{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "from pymongo import MongoClient\n",
    "from pandas.io.json import json_normalize\n",
    "\n",
    "plt.style.use('ggplot')\n",
    "from pylab import mpl\n",
    "mpl.rcParams['font.sans-serif'] = ['SimHei']  #解决seaborn中文字体显示问题\n",
    "plt.rc('figure', figsize=(10, 10))  #把plt默认的图片size调大一点\n",
    "plt.rcParams[\"figure.dpi\"] =mpl.rcParams['axes.unicode_minus'] = False # 解决保存图像是负号'-'显示为方块的问题\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "conn = MongoClient(host='127.0.0.1', port=27017)  # 实例化MongoClient\n",
    "db = conn.get_database('KrisWu')  # 连接到CaiXuKun数据库\n",
    "\n",
    "repost = db.get_collection('repost') # 连接到集合repost\n",
    "mon_data = repost.find()  # 查询这个集合下的所有记录"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = json_normalize([comment for comment in mon_data])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 102118 entries, 0 to 102117\n",
      "Columns: 111 entries, _id to version\n",
      "dtypes: bool(10), float64(59), int64(19), object(23)\n",
      "memory usage: 79.7+ MB\n"
     ]
    }
   ],
   "source": [
    "data.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>_id</th>\n",
       "      <th>ad_state</th>\n",
       "      <th>attitudes_count</th>\n",
       "      <th>bid</th>\n",
       "      <th>can_edit</th>\n",
       "      <th>cardid</th>\n",
       "      <th>comments_count</th>\n",
       "      <th>content_auth</th>\n",
       "      <th>created_at</th>\n",
       "      <th>darwin_tags</th>\n",
       "      <th>...</th>\n",
       "      <th>user.profile_image_url</th>\n",
       "      <th>user.profile_url</th>\n",
       "      <th>user.screen_name</th>\n",
       "      <th>user.statuses_count</th>\n",
       "      <th>user.urank</th>\n",
       "      <th>user.verified</th>\n",
       "      <th>user.verified_reason</th>\n",
       "      <th>user.verified_type</th>\n",
       "      <th>user.verified_type_ext</th>\n",
       "      <th>version</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>71066</th>\n",
       "      <td>5cb9e2c4b4fbcfda28bdb098</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>HqnZC1nct</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4小时前</td>\n",
       "      <td>[]</td>\n",
       "      <td>...</td>\n",
       "      <td>https://tvax4.sinaimg.cn/crop.0.0.750.750.180/...</td>\n",
       "      <td>https://m.weibo.cn/u/7042796303?uid=7042796303</td>\n",
       "      <td>不吃辣怎么活得下去</td>\n",
       "      <td>23</td>\n",
       "      <td>4</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>-1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47793</th>\n",
       "      <td>5cb9a121b4fbcfda28bca1d3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>HqmZzzffi</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>25分钟前</td>\n",
       "      <td>[]</td>\n",
       "      <td>...</td>\n",
       "      <td>https://tvax4.sinaimg.cn/crop.0.0.512.512.180/...</td>\n",
       "      <td>https://m.weibo.cn/u/5273246409?uid=5273246409</td>\n",
       "      <td>Joeyyll</td>\n",
       "      <td>210</td>\n",
       "      <td>9</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>-1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>79861</th>\n",
       "      <td>5cb9e91db4fbcfda28be1d3b</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>HqnKc6bnk</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4小时前</td>\n",
       "      <td>[]</td>\n",
       "      <td>...</td>\n",
       "      <td>https://tvax1.sinaimg.cn/crop.0.0.996.996.180/...</td>\n",
       "      <td>https://m.weibo.cn/u/6039314470?uid=6039314470</td>\n",
       "      <td>黎黎哩嘿</td>\n",
       "      <td>56</td>\n",
       "      <td>4</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>-1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25852</th>\n",
       "      <td>5cb98bc7b4fbcfda28bbb805</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>Hqms1EPTQ</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>8小时前</td>\n",
       "      <td>[]</td>\n",
       "      <td>...</td>\n",
       "      <td>https://tvax2.sinaimg.cn/crop.0.0.512.512.180/...</td>\n",
       "      <td>https://m.weibo.cn/u/5641981190?uid=5641981190</td>\n",
       "      <td>cococonutpalm</td>\n",
       "      <td>205</td>\n",
       "      <td>28</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>-1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50153</th>\n",
       "      <td>5cb9a332b4fbcfda28bcbd8e</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>HqmY30Dsf</td>\n",
       "      <td>False</td>\n",
       "      <td>star_095</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>6小时前</td>\n",
       "      <td>[]</td>\n",
       "      <td>...</td>\n",
       "      <td>https://tvax2.sinaimg.cn/crop.0.0.996.996.180/...</td>\n",
       "      <td>https://m.weibo.cn/u/5208873191?uid=5208873191</td>\n",
       "      <td>DK哥哥的cute妹妹粉</td>\n",
       "      <td>86</td>\n",
       "      <td>9</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>-1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 111 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                            _id  ad_state  attitudes_count        bid  \\\n",
       "71066  5cb9e2c4b4fbcfda28bdb098       NaN                0  HqnZC1nct   \n",
       "47793  5cb9a121b4fbcfda28bca1d3       NaN                0  HqmZzzffi   \n",
       "79861  5cb9e91db4fbcfda28be1d3b       NaN                0  HqnKc6bnk   \n",
       "25852  5cb98bc7b4fbcfda28bbb805       NaN                0  Hqms1EPTQ   \n",
       "50153  5cb9a332b4fbcfda28bcbd8e       NaN                0  HqmY30Dsf   \n",
       "\n",
       "       can_edit    cardid  comments_count  content_auth created_at  \\\n",
       "71066     False       NaN               0             0       4小时前   \n",
       "47793     False       NaN               0             0      25分钟前   \n",
       "79861     False       NaN               0             0       4小时前   \n",
       "25852     False       NaN               0             0       8小时前   \n",
       "50153     False  star_095               0             0       6小时前   \n",
       "\n",
       "      darwin_tags   ...                                user.profile_image_url  \\\n",
       "71066          []   ...     https://tvax4.sinaimg.cn/crop.0.0.750.750.180/...   \n",
       "47793          []   ...     https://tvax4.sinaimg.cn/crop.0.0.512.512.180/...   \n",
       "79861          []   ...     https://tvax1.sinaimg.cn/crop.0.0.996.996.180/...   \n",
       "25852          []   ...     https://tvax2.sinaimg.cn/crop.0.0.512.512.180/...   \n",
       "50153          []   ...     https://tvax2.sinaimg.cn/crop.0.0.996.996.180/...   \n",
       "\n",
       "                                     user.profile_url  user.screen_name  \\\n",
       "71066  https://m.weibo.cn/u/7042796303?uid=7042796303         不吃辣怎么活得下去   \n",
       "47793  https://m.weibo.cn/u/5273246409?uid=5273246409           Joeyyll   \n",
       "79861  https://m.weibo.cn/u/6039314470?uid=6039314470              黎黎哩嘿   \n",
       "25852  https://m.weibo.cn/u/5641981190?uid=5641981190     cococonutpalm   \n",
       "50153  https://m.weibo.cn/u/5208873191?uid=5208873191      DK哥哥的cute妹妹粉   \n",
       "\n",
       "       user.statuses_count  user.urank  user.verified user.verified_reason  \\\n",
       "71066                   23           4          False                  NaN   \n",
       "47793                  210           9          False                  NaN   \n",
       "79861                   56           4          False                  NaN   \n",
       "25852                  205          28          False                  NaN   \n",
       "50153                   86           9          False                  NaN   \n",
       "\n",
       "       user.verified_type user.verified_type_ext  version  \n",
       "71066                  -1                    NaN      NaN  \n",
       "47793                  -1                    NaN      NaN  \n",
       "79861                  -1                    NaN      NaN  \n",
       "25852                  -1                    NaN      NaN  \n",
       "50153                  -1                    NaN      NaN  \n",
       "\n",
       "[5 rows x 111 columns]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.sample(5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 1. 数据清洗\n",
    "由于数据入库的时候没有进行清洗，所以数据多出了很多没用的字段，需要先清洗掉"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['_id', 'ad_state', 'attitudes_count', 'bid', 'can_edit', 'cardid', 'comments_count', 'content_auth', 'created_at', 'darwin_tags', 'edit_at', 'edit_count', 'expire_time', 'favorited', 'hide_flag', 'hide_hot_flow', 'id', 'isLongText', 'is_imported_topic', 'is_paid', 'mblog_vip_type', 'mblogtype', 'mid', 'more_info_type', 'pending_approval_count', 'pic_ids', 'pic_types', 'pid', 'raw_text', 'reposts_count', 'reward_exhibition_type', 'show_additional_indication', 'source', 'sync_mblog', 'topic_id', 'user.avatar_hd', 'user.badge.ali_1688', 'user.badge.anniversary', 'user.badge.asiad_2018', 'user.badge.avengers_2019', 'user.badge.bind_taobao', 'user.badge.cz_wed_2017', 'user.badge.dailv', 'user.badge.dailv_2018', 'user.badge.denglong_2019', 'user.badge.double11_2018', 'user.badge.dzwbqlx_2016', 'user.badge.follow_whitelist_video', 'user.badge.fools_day_2016', 'user.badge.fu_2019', 'user.badge.gongyi', 'user.badge.gongyi_level', 'user.badge.hongbaofei_2019', 'user.badge.inspector', 'user.badge.kpl_2018', 'user.badge.league_badge', 'user.badge.league_badge_2018', 'user.badge.lol_gm_2017', 'user.badge.lol_msi_2017', 'user.badge.lol_s8', 'user.badge.meilizhongguo_2018', 'user.badge.memorial_2018', 'user.badge.national_day_2018', 'user.badge.panda', 'user.badge.qixi_2018', 'user.badge.relation_display', 'user.badge.self_media', 'user.badge.status_visible', 'user.badge.suishoupai_2018', 'user.badge.super_star_2017', 'user.badge.super_star_2018', 'user.badge.taobao', 'user.badge.travel_2017', 'user.badge.uefa_euro_2016', 'user.badge.unread_pool', 'user.badge.unread_pool_ext', 'user.badge.user_name_certificate', 'user.badge.v_influence_2018', 'user.badge.video_attention', 'user.badge.vip_activity2', 'user.badge.wbzy_2018', 'user.badge.weibo_display_fans', 'user.badge.wenchuan_10th', 'user.badge.wenda_v2', 'user.badge.womensday_2018', 'user.badge.worldcup_2018', 'user.badge.yiqijuan_2018', 'user.badge.zongyiji', 'user.close_blue_v', 'user.cover_image_phone', 'user.description', 'user.follow_count', 'user.follow_me', 'user.followers_count', 'user.following', 'user.gender', 'user.id', 'user.like', 'user.like_me', 'user.mbrank', 'user.mbtype', 'user.profile_image_url', 'user.profile_url', 'user.screen_name', 'user.statuses_count', 'user.urank', 'user.verified', 'user.verified_reason', 'user.verified_type', 'user.verified_type_ext', 'version']\n"
     ]
    }
   ],
   "source": [
    "print(list(data.columns))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "in_columns = ['attitudes_count', 'comments_count', 'reposts_count', 'mid', 'raw_text', \n",
    "          'source', 'user.description', 'user.follow_count', 'user.followers_count', \n",
    "          'user.gender', 'user.id', 'user.mbrank', 'user.mbtype', 'user.profile_url', \n",
    "          'user.profile_image_url', 'user.screen_name', 'user.statuses_count', \n",
    "          'user.urank', 'user.verified', 'user.verified_reason']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = data[in_columns]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 102118 entries, 0 to 102117\n",
      "Data columns (total 20 columns):\n",
      "attitudes_count           102118 non-null int64\n",
      "comments_count            102118 non-null int64\n",
      "reposts_count             102118 non-null int64\n",
      "mid                       102118 non-null object\n",
      "raw_text                  102118 non-null object\n",
      "source                    102118 non-null object\n",
      "user.description          102118 non-null object\n",
      "user.follow_count         102118 non-null int64\n",
      "user.followers_count      102118 non-null int64\n",
      "user.gender               102118 non-null object\n",
      "user.id                   102118 non-null int64\n",
      "user.mbrank               102118 non-null int64\n",
      "user.mbtype               102118 non-null int64\n",
      "user.profile_url          102118 non-null object\n",
      "user.profile_image_url    102118 non-null object\n",
      "user.screen_name          102118 non-null object\n",
      "user.statuses_count       102118 non-null int64\n",
      "user.urank                102118 non-null int64\n",
      "user.verified             102118 non-null bool\n",
      "user.verified_reason      5260 non-null object\n",
      "dtypes: bool(1), int64(10), object(9)\n",
      "memory usage: 14.9+ MB\n"
     ]
    }
   ],
   "source": [
    "data.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "data.to_csv('kriswu.csv', index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "问题：\n",
    "1. 吴亦凡该微博的转发是否存在假流量？\n",
    "2. 大家对于《大碗宽面》怎么看？\n",
    "3. 有多少人拿吴亦凡跟蔡徐坤做对比？\n",
    "4. 有多少人开始路转粉了？\n",
    "5. 评论的词云图"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1. 吴亦凡该微博的转发是否存在假流量？"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "f    77279\n",
       "m    24839\n",
       "Name: user.gender, dtype: int64"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 先来看看吴亦凡的粉丝性别比例\n",
    "fans_num = data['user.gender'].value_counts()\n",
    "fans_num"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 376,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<script>\n",
       "    require.config({\n",
       "        paths: {\n",
       "            'echarts': '/nbextensions/echarts/echarts.min'\n",
       "        }\n",
       "    });\n",
       "</script>\n",
       "    <div id=\"71944d8c8de1485d8435c284a18aa66b\" style=\"width:600px;height:500px;\"></div>\n",
       "\n",
       "\n",
       "<script>\n",
       "    require(['echarts'], function(echarts) {\n",
       "        \n",
       "var myChart_71944d8c8de1485d8435c284a18aa66b = echarts.init(document.getElementById('71944d8c8de1485d8435c284a18aa66b'), null, {renderer: 'canvas'});\n",
       "var option_71944d8c8de1485d8435c284a18aa66b = {\n",
       "    \"title\": [\n",
       "        {\n",
       "            \"text\": \"\\u5434\\u4ea6\\u51e1\\u7c89\\u4e1d\\u6027\\u522b\\u6bd4\\u4f8b\\u521d\\u63a2\",\n",
       "            \"subtext\": \"\",\n",
       "            \"left\": \"auto\",\n",
       "            \"top\": \"auto\",\n",
       "            \"textStyle\": {\n",
       "                \"color\": \"#000\",\n",
       "                \"fontSize\": 18\n",
       "            },\n",
       "            \"subtextStyle\": {\n",
       "                \"color\": \"#aaa\",\n",
       "                \"fontSize\": 12\n",
       "            }\n",
       "        }\n",
       "    ],\n",
       "    \"toolbox\": {\n",
       "        \"show\": true,\n",
       "        \"orient\": \"vertical\",\n",
       "        \"left\": \"95%\",\n",
       "        \"top\": \"center\",\n",
       "        \"feature\": {\n",
       "            \"saveAsImage\": {\n",
       "                \"show\": true,\n",
       "                \"title\": \"\\u4e0b\\u8f7d\\u56fe\\u7247\"\n",
       "            },\n",
       "            \"restore\": {\n",
       "                \"show\": true\n",
       "            },\n",
       "            \"dataView\": {\n",
       "                \"show\": true\n",
       "            }\n",
       "        }\n",
       "    },\n",
       "    \"series_id\": 2474944,\n",
       "    \"tooltip\": {\n",
       "        \"trigger\": \"item\",\n",
       "        \"triggerOn\": \"mousemove|click\",\n",
       "        \"axisPointer\": {\n",
       "            \"type\": \"line\"\n",
       "        },\n",
       "        \"formatter\": null,\n",
       "        \"textStyle\": {\n",
       "            \"color\": \"#fff\",\n",
       "            \"fontSize\": 14\n",
       "        },\n",
       "        \"backgroundColor\": \"rgba(50,50,50,0.7)\",\n",
       "        \"borderColor\": \"#333\",\n",
       "        \"borderWidth\": 0\n",
       "    },\n",
       "    \"series\": [\n",
       "        {\n",
       "            \"type\": \"bar\",\n",
       "            \"name\": \"(\\u603b\\u6570\\u636e102118\\u6761)\",\n",
       "            \"data\": [\n",
       "                77279.0,\n",
       "                24839.0\n",
       "            ],\n",
       "            \"stack\": \"stack_2474944\",\n",
       "            \"barCategoryGap\": \"20%\",\n",
       "            \"label\": {\n",
       "                \"normal\": {\n",
       "                    \"show\": true,\n",
       "                    \"position\": \"top\",\n",
       "                    \"textStyle\": {\n",
       "                        \"color\": \"#000\",\n",
       "                        \"fontSize\": 12\n",
       "                    },\n",
       "                    \"formatter\": null\n",
       "                },\n",
       "                \"emphasis\": {\n",
       "                    \"show\": true,\n",
       "                    \"position\": null,\n",
       "                    \"textStyle\": {\n",
       "                        \"color\": \"#fff\",\n",
       "                        \"fontSize\": 12\n",
       "                    }\n",
       "                }\n",
       "            },\n",
       "            \"markPoint\": {\n",
       "                \"data\": []\n",
       "            },\n",
       "            \"markLine\": {\n",
       "                \"data\": []\n",
       "            },\n",
       "            \"seriesId\": 2474944\n",
       "        }\n",
       "    ],\n",
       "    \"legend\": [\n",
       "        {\n",
       "            \"data\": [\n",
       "                \"(\\u603b\\u6570\\u636e102118\\u6761)\"\n",
       "            ],\n",
       "            \"selectedMode\": \"multiple\",\n",
       "            \"show\": true,\n",
       "            \"left\": \"center\",\n",
       "            \"top\": \"top\",\n",
       "            \"orient\": \"horizontal\",\n",
       "            \"textStyle\": {\n",
       "                \"fontSize\": 12,\n",
       "                \"color\": \"#333\"\n",
       "            }\n",
       "        }\n",
       "    ],\n",
       "    \"backgroundColor\": \"#fff\",\n",
       "    \"xAxis\": [\n",
       "        {\n",
       "            \"name\": \"\",\n",
       "            \"show\": true,\n",
       "            \"nameLocation\": \"middle\",\n",
       "            \"nameGap\": 25,\n",
       "            \"nameTextStyle\": {\n",
       "                \"fontSize\": 14\n",
       "            },\n",
       "            \"axisLabel\": {\n",
       "                \"interval\": \"auto\",\n",
       "                \"rotate\": 0,\n",
       "                \"margin\": 8,\n",
       "                \"textStyle\": {\n",
       "                    \"fontSize\": 20,\n",
       "                    \"color\": \"#000\"\n",
       "                }\n",
       "            },\n",
       "            \"axisTick\": {\n",
       "                \"alignWithLabel\": false\n",
       "            },\n",
       "            \"inverse\": false,\n",
       "            \"position\": null,\n",
       "            \"boundaryGap\": true,\n",
       "            \"min\": null,\n",
       "            \"max\": null,\n",
       "            \"data\": [\n",
       "                \"\\u5973\",\n",
       "                \"\\u7537\"\n",
       "            ],\n",
       "            \"type\": \"category\"\n",
       "        }\n",
       "    ],\n",
       "    \"yAxis\": [\n",
       "        {\n",
       "            \"name\": \"\",\n",
       "            \"show\": true,\n",
       "            \"nameLocation\": \"middle\",\n",
       "            \"nameGap\": 25,\n",
       "            \"nameTextStyle\": {\n",
       "                \"fontSize\": 14\n",
       "            },\n",
       "            \"axisLabel\": {\n",
       "                \"formatter\": \"{value} \",\n",
       "                \"rotate\": 0,\n",
       "                \"interval\": \"auto\",\n",
       "                \"margin\": 8,\n",
       "                \"textStyle\": {\n",
       "                    \"fontSize\": 14,\n",
       "                    \"color\": \"#000\"\n",
       "                }\n",
       "            },\n",
       "            \"axisTick\": {\n",
       "                \"alignWithLabel\": false\n",
       "            },\n",
       "            \"inverse\": false,\n",
       "            \"position\": null,\n",
       "            \"boundaryGap\": true,\n",
       "            \"min\": null,\n",
       "            \"max\": null,\n",
       "            \"splitLine\": {\n",
       "                \"show\": true\n",
       "            },\n",
       "            \"type\": \"value\"\n",
       "        }\n",
       "    ],\n",
       "    \"color\": [\n",
       "        \"#c23531\",\n",
       "        \"#2f4554\",\n",
       "        \"#61a0a8\",\n",
       "        \"#d48265\",\n",
       "        \"#749f83\",\n",
       "        \"#ca8622\",\n",
       "        \"#bda29a\",\n",
       "        \"#6e7074\",\n",
       "        \"#546570\",\n",
       "        \"#c4ccd3\",\n",
       "        \"#f05b72\",\n",
       "        \"#ef5b9c\",\n",
       "        \"#f47920\",\n",
       "        \"#905a3d\",\n",
       "        \"#fab27b\",\n",
       "        \"#2a5caa\",\n",
       "        \"#444693\",\n",
       "        \"#726930\",\n",
       "        \"#b2d235\",\n",
       "        \"#6d8346\",\n",
       "        \"#ac6767\",\n",
       "        \"#1d953f\",\n",
       "        \"#6950a1\",\n",
       "        \"#918597\",\n",
       "        \"#f6f5ec\"\n",
       "    ]\n",
       "};\n",
       "myChart_71944d8c8de1485d8435c284a18aa66b.setOption(option_71944d8c8de1485d8435c284a18aa66b);\n",
       "\n",
       "    });\n",
       "</script>\n"
      ],
      "text/plain": [
       "<pyecharts.charts.bar.Bar at 0x1252c55f8>"
      ]
     },
     "execution_count": 376,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from pyecharts import Bar\n",
    "\n",
    "bar = Bar(\"吴亦凡粉丝性别比例初探\", width = 600,height=500)\n",
    "bar.add(\"(总数据102118条)\", ['女', '男'], fans_num.values, is_stack=True, \n",
    "       xaxis_label_textsize=20, yaxis_label_textsize=14, is_label_show=True)\n",
    "bar"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "f    75.68\n",
       "m    24.32\n",
       "Name: user.gender, dtype: float64"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.round(fans_num/fans_num.sum()*100, 2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 378,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>attitudes_count</th>\n",
       "      <th>comments_count</th>\n",
       "      <th>reposts_count</th>\n",
       "      <th>mid</th>\n",
       "      <th>raw_text</th>\n",
       "      <th>source</th>\n",
       "      <th>user.description</th>\n",
       "      <th>user.follow_count</th>\n",
       "      <th>user.followers_count</th>\n",
       "      <th>user.gender</th>\n",
       "      <th>user.id</th>\n",
       "      <th>user.mbrank</th>\n",
       "      <th>user.mbtype</th>\n",
       "      <th>user.profile_url</th>\n",
       "      <th>user.profile_image_url</th>\n",
       "      <th>user.screen_name</th>\n",
       "      <th>user.statuses_count</th>\n",
       "      <th>user.urank</th>\n",
       "      <th>user.verified</th>\n",
       "      <th>user.verified_reason</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>41706</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4362807157558622</td>\n",
       "      <td>😂️😂️😂️</td>\n",
       "      <td>前置双摄vivo X9</td>\n",
       "      <td>💛想要那种无所畏惧的心甘情愿</td>\n",
       "      <td>295</td>\n",
       "      <td>196</td>\n",
       "      <td>f</td>\n",
       "      <td>5662041771</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>https://m.weibo.cn/u/5662041771?uid=5662041771</td>\n",
       "      <td>https://tvax1.sinaimg.cn/crop.0.0.996.996.180/...</td>\n",
       "      <td>精分少女不太冷</td>\n",
       "      <td>710</td>\n",
       "      <td>24</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39079</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4362798714648990</td>\n",
       "      <td>嘻嘻</td>\n",
       "      <td>iPhone客户端</td>\n",
       "      <td>我要好运👍</td>\n",
       "      <td>331</td>\n",
       "      <td>245</td>\n",
       "      <td>f</td>\n",
       "      <td>5031367390</td>\n",
       "      <td>2</td>\n",
       "      <td>11</td>\n",
       "      <td>https://m.weibo.cn/u/5031367390?uid=5031367390</td>\n",
       "      <td>https://tvax1.sinaimg.cn/crop.0.0.512.512.180/...</td>\n",
       "      <td>不吃胡萝卜的小王子yoo</td>\n",
       "      <td>806</td>\n",
       "      <td>30</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45108</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4362847078920385</td>\n",
       "      <td>//@舒淇:肚子暖暖 心就暖 [色][色][色]</td>\n",
       "      <td>荣耀手机 勇敢做自己</td>\n",
       "      <td>唯早晨和吴亦凡不可辜负♥</td>\n",
       "      <td>170</td>\n",
       "      <td>99</td>\n",
       "      <td>f</td>\n",
       "      <td>5540629792</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>https://m.weibo.cn/u/5540629792?uid=5540629792</td>\n",
       "      <td>https://tvax1.sinaimg.cn/crop.0.0.996.996.180/...</td>\n",
       "      <td>F君的Rachel</td>\n",
       "      <td>3988</td>\n",
       "      <td>4</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>51390</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4362855135966200</td>\n",
       "      <td>转发微博</td>\n",
       "      <td>前后2000万 OPPO R11</td>\n",
       "      <td>从你的全世界路过。</td>\n",
       "      <td>197</td>\n",
       "      <td>54</td>\n",
       "      <td>f</td>\n",
       "      <td>5469881353</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>https://m.weibo.cn/u/5469881353?uid=5469881353</td>\n",
       "      <td>https://tvax4.sinaimg.cn/crop.0.0.996.996.180/...</td>\n",
       "      <td>大长腿mimo</td>\n",
       "      <td>45</td>\n",
       "      <td>9</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>88521</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4362871112369317</td>\n",
       "      <td>respect</td>\n",
       "      <td>vivo X20全面屏手机</td>\n",
       "      <td>( • ̀ω ⁃᷄)✧</td>\n",
       "      <td>48</td>\n",
       "      <td>181</td>\n",
       "      <td>f</td>\n",
       "      <td>6574764125</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>https://m.weibo.cn/u/6574764125?uid=6574764125</td>\n",
       "      <td>https://tvax1.sinaimg.cn/crop.0.0.996.996.180/...</td>\n",
       "      <td>你是我的小哥哥呐_</td>\n",
       "      <td>51</td>\n",
       "      <td>9</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       attitudes_count  comments_count  reposts_count               mid  \\\n",
       "41706                0               0              0  4362807157558622   \n",
       "39079                0               0              0  4362798714648990   \n",
       "45108                0               0              0  4362847078920385   \n",
       "51390                0               0              0  4362855135966200   \n",
       "88521                0               0              0  4362871112369317   \n",
       "\n",
       "                       raw_text            source user.description  \\\n",
       "41706                    😂️😂️😂️       前置双摄vivo X9   💛想要那种无所畏惧的心甘情愿   \n",
       "39079                        嘻嘻         iPhone客户端            我要好运👍   \n",
       "45108  //@舒淇:肚子暖暖 心就暖 [色][色][色]        荣耀手机 勇敢做自己     唯早晨和吴亦凡不可辜负♥   \n",
       "51390                      转发微博  前后2000万 OPPO R11        从你的全世界路过。   \n",
       "88521                   respect     vivo X20全面屏手机      ( • ̀ω ⁃᷄)✧   \n",
       "\n",
       "       user.follow_count  user.followers_count user.gender     user.id  \\\n",
       "41706                295                   196           f  5662041771   \n",
       "39079                331                   245           f  5031367390   \n",
       "45108                170                    99           f  5540629792   \n",
       "51390                197                    54           f  5469881353   \n",
       "88521                 48                   181           f  6574764125   \n",
       "\n",
       "       user.mbrank  user.mbtype  \\\n",
       "41706            2            2   \n",
       "39079            2           11   \n",
       "45108            1            2   \n",
       "51390            0            0   \n",
       "88521            0            0   \n",
       "\n",
       "                                     user.profile_url  \\\n",
       "41706  https://m.weibo.cn/u/5662041771?uid=5662041771   \n",
       "39079  https://m.weibo.cn/u/5031367390?uid=5031367390   \n",
       "45108  https://m.weibo.cn/u/5540629792?uid=5540629792   \n",
       "51390  https://m.weibo.cn/u/5469881353?uid=5469881353   \n",
       "88521  https://m.weibo.cn/u/6574764125?uid=6574764125   \n",
       "\n",
       "                                  user.profile_image_url user.screen_name  \\\n",
       "41706  https://tvax1.sinaimg.cn/crop.0.0.996.996.180/...          精分少女不太冷   \n",
       "39079  https://tvax1.sinaimg.cn/crop.0.0.512.512.180/...     不吃胡萝卜的小王子yoo   \n",
       "45108  https://tvax1.sinaimg.cn/crop.0.0.996.996.180/...        F君的Rachel   \n",
       "51390  https://tvax4.sinaimg.cn/crop.0.0.996.996.180/...          大长腿mimo   \n",
       "88521  https://tvax1.sinaimg.cn/crop.0.0.996.996.180/...        你是我的小哥哥呐_   \n",
       "\n",
       "       user.statuses_count  user.urank  user.verified user.verified_reason  \n",
       "41706                  710          24          False                  NaN  \n",
       "39079                  806          30          False                  NaN  \n",
       "45108                 3988           4          False                  NaN  \n",
       "51390                   45           9          False                  NaN  \n",
       "88521                   51           9          False                  NaN  "
      ]
     },
     "execution_count": 378,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[data['user.gender']=='f'].sample(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 473,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>attitudes_count</th>\n",
       "      <th>comments_count</th>\n",
       "      <th>reposts_count</th>\n",
       "      <th>mid</th>\n",
       "      <th>raw_text</th>\n",
       "      <th>source</th>\n",
       "      <th>user.description</th>\n",
       "      <th>user.follow_count</th>\n",
       "      <th>user.followers_count</th>\n",
       "      <th>user.gender</th>\n",
       "      <th>user.id</th>\n",
       "      <th>user.mbrank</th>\n",
       "      <th>user.mbtype</th>\n",
       "      <th>user.profile_url</th>\n",
       "      <th>user.profile_image_url</th>\n",
       "      <th>user.screen_name</th>\n",
       "      <th>user.statuses_count</th>\n",
       "      <th>user.urank</th>\n",
       "      <th>user.verified</th>\n",
       "      <th>user.verified_reason</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>58187</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4362908001173820</td>\n",
       "      <td>吴亦凡对不起[二哈][二哈][二哈][二哈]</td>\n",
       "      <td>前后2000万 OPPO R11</td>\n",
       "      <td>喝了王老吉成为基佬王的男人</td>\n",
       "      <td>569</td>\n",
       "      <td>165</td>\n",
       "      <td>m</td>\n",
       "      <td>3607455341</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>https://m.weibo.cn/u/3607455341?uid=3607455341</td>\n",
       "      <td>https://tvax1.sinaimg.cn/crop.0.0.996.996.180/...</td>\n",
       "      <td>香香鸡啊</td>\n",
       "      <td>48</td>\n",
       "      <td>14</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10835</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4362811440290233</td>\n",
       "      <td>竟然很好听？😂 😂 不错哦</td>\n",
       "      <td>OPPO R11s Plus</td>\n",
       "      <td>陪在身边才算拥有，爱到习惯才叫长久。</td>\n",
       "      <td>297</td>\n",
       "      <td>100</td>\n",
       "      <td>m</td>\n",
       "      <td>5628399052</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>https://m.weibo.cn/u/5628399052?uid=5628399052</td>\n",
       "      <td>https://tvax1.sinaimg.cn/crop.0.0.996.996.180/...</td>\n",
       "      <td>今年要更加努力xx</td>\n",
       "      <td>764</td>\n",
       "      <td>14</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12116</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4362800471640602</td>\n",
       "      <td>吴亦凡牛逼</td>\n",
       "      <td>Android客户端</td>\n",
       "      <td></td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>m</td>\n",
       "      <td>6721476489</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>https://m.weibo.cn/u/6721476489?uid=6721476489</td>\n",
       "      <td>https://tvax2.sinaimg.cn/default/images/defaul...</td>\n",
       "      <td>用户6721476489</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4362776065372823</td>\n",
       "      <td>今天我就是51粉丝了[doge]</td>\n",
       "      <td>HUAWEI P20</td>\n",
       "      <td>到底我也是个高傲的成年…</td>\n",
       "      <td>369</td>\n",
       "      <td>248</td>\n",
       "      <td>m</td>\n",
       "      <td>5171942624</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>https://m.weibo.cn/u/5171942624?uid=5171942624</td>\n",
       "      <td>https://tvax4.sinaimg.cn/crop.0.0.996.996.180/...</td>\n",
       "      <td>小羊的百事快落水</td>\n",
       "      <td>1054</td>\n",
       "      <td>31</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41846</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4362810878222659</td>\n",
       "      <td>奈何桥上。                                         ...</td>\n",
       "      <td>HUAWEI Mate 10 Pro</td>\n",
       "      <td></td>\n",
       "      <td>188</td>\n",
       "      <td>62</td>\n",
       "      <td>m</td>\n",
       "      <td>5621943005</td>\n",
       "      <td>1</td>\n",
       "      <td>11</td>\n",
       "      <td>https://m.weibo.cn/u/5621943005?uid=5621943005</td>\n",
       "      <td>https://tvax1.sinaimg.cn/crop.0.0.664.664.180/...</td>\n",
       "      <td>HaKuNa玛挞挞</td>\n",
       "      <td>62</td>\n",
       "      <td>9</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       attitudes_count  comments_count  reposts_count               mid  \\\n",
       "58187                0               0              0  4362908001173820   \n",
       "10835                0               0              0  4362811440290233   \n",
       "12116                0               0              0  4362800471640602   \n",
       "107                  0               0              0  4362776065372823   \n",
       "41846                1               0              0  4362810878222659   \n",
       "\n",
       "                                                raw_text              source  \\\n",
       "58187                             吴亦凡对不起[二哈][二哈][二哈][二哈]    前后2000万 OPPO R11   \n",
       "10835                                      竟然很好听？😂 😂 不错哦      OPPO R11s Plus   \n",
       "12116                                              吴亦凡牛逼          Android客户端   \n",
       "107                                     今天我就是51粉丝了[doge]          HUAWEI P20   \n",
       "41846  奈何桥上。                                         ...  HUAWEI Mate 10 Pro   \n",
       "\n",
       "         user.description  user.follow_count  user.followers_count  \\\n",
       "58187       喝了王老吉成为基佬王的男人                569                   165   \n",
       "10835  陪在身边才算拥有，爱到习惯才叫长久。                297                   100   \n",
       "12116                                      0                     1   \n",
       "107          到底我也是个高傲的成年…                369                   248   \n",
       "41846                                    188                    62   \n",
       "\n",
       "      user.gender     user.id  user.mbrank  user.mbtype  \\\n",
       "58187           m  3607455341            0            0   \n",
       "10835           m  5628399052            0            0   \n",
       "12116           m  6721476489            0            0   \n",
       "107             m  5171942624            0            0   \n",
       "41846           m  5621943005            1           11   \n",
       "\n",
       "                                     user.profile_url  \\\n",
       "58187  https://m.weibo.cn/u/3607455341?uid=3607455341   \n",
       "10835  https://m.weibo.cn/u/5628399052?uid=5628399052   \n",
       "12116  https://m.weibo.cn/u/6721476489?uid=6721476489   \n",
       "107    https://m.weibo.cn/u/5171942624?uid=5171942624   \n",
       "41846  https://m.weibo.cn/u/5621943005?uid=5621943005   \n",
       "\n",
       "                                  user.profile_image_url user.screen_name  \\\n",
       "58187  https://tvax1.sinaimg.cn/crop.0.0.996.996.180/...             香香鸡啊   \n",
       "10835  https://tvax1.sinaimg.cn/crop.0.0.996.996.180/...        今年要更加努力xx   \n",
       "12116  https://tvax2.sinaimg.cn/default/images/defaul...     用户6721476489   \n",
       "107    https://tvax4.sinaimg.cn/crop.0.0.996.996.180/...         小羊的百事快落水   \n",
       "41846  https://tvax1.sinaimg.cn/crop.0.0.664.664.180/...        HaKuNa玛挞挞   \n",
       "\n",
       "       user.statuses_count  user.urank  user.verified user.verified_reason  \n",
       "58187                   48          14          False                  NaN  \n",
       "10835                  764          14          False                  NaN  \n",
       "12116                    5           2          False                  NaN  \n",
       "107                   1054          31          False                  NaN  \n",
       "41846                   62           9          False                  NaN  "
      ]
     },
     "execution_count": 473,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[data['user.gender']=='m'].sample(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>attitudes_count</th>\n",
       "      <th>comments_count</th>\n",
       "      <th>reposts_count</th>\n",
       "      <th>mid</th>\n",
       "      <th>raw_text</th>\n",
       "      <th>source</th>\n",
       "      <th>user.description</th>\n",
       "      <th>user.follow_count</th>\n",
       "      <th>user.followers_count</th>\n",
       "      <th>user.gender</th>\n",
       "      <th>user.id</th>\n",
       "      <th>user.mbrank</th>\n",
       "      <th>user.mbtype</th>\n",
       "      <th>user.profile_url</th>\n",
       "      <th>user.profile_image_url</th>\n",
       "      <th>user.screen_name</th>\n",
       "      <th>user.statuses_count</th>\n",
       "      <th>user.urank</th>\n",
       "      <th>user.verified</th>\n",
       "      <th>user.verified_reason</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>10715</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4362800564822206</td>\n",
       "      <td>吴亦凡牛逼</td>\n",
       "      <td>Android客户端</td>\n",
       "      <td></td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>m</td>\n",
       "      <td>6693614926</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>https://m.weibo.cn/u/6693614926?uid=6693614926</td>\n",
       "      <td>https://tvax3.sinaimg.cn/default/images/defaul...</td>\n",
       "      <td>用户6693614926</td>\n",
       "      <td>40</td>\n",
       "      <td>4</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46174</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4362800585122065</td>\n",
       "      <td>吴亦凡牛逼</td>\n",
       "      <td>Android客户端</td>\n",
       "      <td></td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>f</td>\n",
       "      <td>6693651578</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>https://m.weibo.cn/u/6693651578?uid=6693651578</td>\n",
       "      <td>https://tvax3.sinaimg.cn/default/images/defaul...</td>\n",
       "      <td>用户6693651578</td>\n",
       "      <td>39</td>\n",
       "      <td>4</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14330</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4362806692434481</td>\n",
       "      <td>转发微博</td>\n",
       "      <td>OPPO智能手机</td>\n",
       "      <td></td>\n",
       "      <td>83</td>\n",
       "      <td>2</td>\n",
       "      <td>f</td>\n",
       "      <td>6981995378</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>https://m.weibo.cn/u/6981995378?uid=6981995378</td>\n",
       "      <td>https://tvax1.sinaimg.cn/crop.0.0.100.100.180/...</td>\n",
       "      <td>激奋_欧耶</td>\n",
       "      <td>11</td>\n",
       "      <td>4</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>67636</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4362925572072904</td>\n",
       "      <td>转发微博</td>\n",
       "      <td>iPhone客户端</td>\n",
       "      <td></td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>m</td>\n",
       "      <td>6580608026</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>https://m.weibo.cn/u/6580608026?uid=6580608026</td>\n",
       "      <td>https://tvax4.sinaimg.cn/crop.0.0.1125.1125.18...</td>\n",
       "      <td>LiZyuuu</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33961</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4362800232918724</td>\n",
       "      <td>吴亦凡牛逼</td>\n",
       "      <td>Android客户端</td>\n",
       "      <td></td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>m</td>\n",
       "      <td>6693960080</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>https://m.weibo.cn/u/6693960080?uid=6693960080</td>\n",
       "      <td>https://tvax1.sinaimg.cn/default/images/defaul...</td>\n",
       "      <td>用户6693960080</td>\n",
       "      <td>38</td>\n",
       "      <td>4</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       attitudes_count  comments_count  reposts_count               mid  \\\n",
       "10715                0               0              0  4362800564822206   \n",
       "46174                0               0              0  4362800585122065   \n",
       "14330                0               0              0  4362806692434481   \n",
       "67636                0               0              0  4362925572072904   \n",
       "33961                0               0              0  4362800232918724   \n",
       "\n",
       "      raw_text      source user.description  user.follow_count  \\\n",
       "10715    吴亦凡牛逼  Android客户端                                   0   \n",
       "46174    吴亦凡牛逼  Android客户端                                   0   \n",
       "14330     转发微博    OPPO智能手机                                  83   \n",
       "67636     转发微博   iPhone客户端                                   2   \n",
       "33961    吴亦凡牛逼  Android客户端                                   0   \n",
       "\n",
       "       user.followers_count user.gender     user.id  user.mbrank  user.mbtype  \\\n",
       "10715                     1           m  6693614926            0            0   \n",
       "46174                     1           f  6693651578            0            0   \n",
       "14330                     2           f  6981995378            0            0   \n",
       "67636                     2           m  6580608026            0            0   \n",
       "33961                     1           m  6693960080            0            0   \n",
       "\n",
       "                                     user.profile_url  \\\n",
       "10715  https://m.weibo.cn/u/6693614926?uid=6693614926   \n",
       "46174  https://m.weibo.cn/u/6693651578?uid=6693651578   \n",
       "14330  https://m.weibo.cn/u/6981995378?uid=6981995378   \n",
       "67636  https://m.weibo.cn/u/6580608026?uid=6580608026   \n",
       "33961  https://m.weibo.cn/u/6693960080?uid=6693960080   \n",
       "\n",
       "                                  user.profile_image_url user.screen_name  \\\n",
       "10715  https://tvax3.sinaimg.cn/default/images/defaul...     用户6693614926   \n",
       "46174  https://tvax3.sinaimg.cn/default/images/defaul...     用户6693651578   \n",
       "14330  https://tvax1.sinaimg.cn/crop.0.0.100.100.180/...            激奋_欧耶   \n",
       "67636  https://tvax4.sinaimg.cn/crop.0.0.1125.1125.18...          LiZyuuu   \n",
       "33961  https://tvax1.sinaimg.cn/default/images/defaul...     用户6693960080   \n",
       "\n",
       "       user.statuses_count  user.urank  user.verified user.verified_reason  \n",
       "10715                   40           4          False                  NaN  \n",
       "46174                   39           4          False                  NaN  \n",
       "14330                   11           4          False                  NaN  \n",
       "67636                    2           3          False                  NaN  \n",
       "33961                   38           4          False                  NaN  "
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_fake = data[((data['user.follow_count']<=5)|(data['user.followers_count']<=5))&\n",
    "                 (data['user.description']=='')&\n",
    "                 (data['comments_count']==0)&\n",
    "                (data['attitudes_count']==0)&\n",
    "                (data['reposts_count']==0)&\n",
    "                (data['user.mbrank']==0)]\n",
    "data_fake.sample(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(5667, 20)"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_fake.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 昵称里包含“用户”的，基本上可以断定是假粉丝\n",
    "data_fake2_index = data[(data['user.follow_count']>5)&\n",
    "                        (data['user.followers_count']>5)&\n",
    "                        (data['user.screen_name'].str.contains('用户'))].index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 把假的流量粉丝转发组合起来\n",
    "data_fake = pd.concat([data_fake, data.iloc[data_fake2_index]])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(6100, 20)"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_fake.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 取出真粉的转发\n",
    "data_true = data.drop(data_fake.index)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(96018, 20)"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_true.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "真粉丝转发数占总转发数的94.03%\n",
      "假粉丝转发数占总转发数的5.97%\n"
     ]
    }
   ],
   "source": [
    "print('真粉丝转发数占总转发数的{}%'.format(np.round(data_true.shape[0]/data.shape[0]*100, 2)))\n",
    "print('假粉丝转发数占总转发数的{}%'.format(np.round(data_fake.shape[0]/data.shape[0]*100, 2)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<script>\n",
       "    require.config({\n",
       "        paths: {\n",
       "            'echarts': '/nbextensions/echarts/echarts.min'\n",
       "        }\n",
       "    });\n",
       "</script>\n",
       "    <div id=\"880eeb900ac346bf995165dde256052c\" style=\"width:600px;height:500px;\"></div>\n",
       "\n",
       "\n",
       "<script>\n",
       "    require(['echarts'], function(echarts) {\n",
       "        \n",
       "var myChart_880eeb900ac346bf995165dde256052c = echarts.init(document.getElementById('880eeb900ac346bf995165dde256052c'), null, {renderer: 'canvas'});\n",
       "var option_880eeb900ac346bf995165dde256052c = {\n",
       "    \"title\": [\n",
       "        {\n",
       "            \"text\": \"\\u5434\\u4ea6\\u51e1\\u771f\\u5047\\u6d41\\u91cf\\u7684\\u8f6c\\u53d1\\u91cf\",\n",
       "            \"subtext\": \"\",\n",
       "            \"left\": \"auto\",\n",
       "            \"top\": \"auto\",\n",
       "            \"textStyle\": {\n",
       "                \"color\": \"#000\",\n",
       "                \"fontSize\": 18\n",
       "            },\n",
       "            \"subtextStyle\": {\n",
       "                \"color\": \"#aaa\",\n",
       "                \"fontSize\": 12\n",
       "            }\n",
       "        }\n",
       "    ],\n",
       "    \"toolbox\": {\n",
       "        \"show\": true,\n",
       "        \"orient\": \"vertical\",\n",
       "        \"left\": \"95%\",\n",
       "        \"top\": \"center\",\n",
       "        \"feature\": {\n",
       "            \"saveAsImage\": {\n",
       "                \"show\": true,\n",
       "                \"title\": \"\\u4e0b\\u8f7d\\u56fe\\u7247\"\n",
       "            },\n",
       "            \"restore\": {\n",
       "                \"show\": true\n",
       "            },\n",
       "            \"dataView\": {\n",
       "                \"show\": true\n",
       "            }\n",
       "        }\n",
       "    },\n",
       "    \"series_id\": 1201124,\n",
       "    \"tooltip\": {\n",
       "        \"trigger\": \"item\",\n",
       "        \"triggerOn\": \"mousemove|click\",\n",
       "        \"axisPointer\": {\n",
       "            \"type\": \"line\"\n",
       "        },\n",
       "        \"formatter\": null,\n",
       "        \"textStyle\": {\n",
       "            \"color\": \"#fff\",\n",
       "            \"fontSize\": 14\n",
       "        },\n",
       "        \"backgroundColor\": \"rgba(50,50,50,0.7)\",\n",
       "        \"borderColor\": \"#333\",\n",
       "        \"borderWidth\": 0\n",
       "    },\n",
       "    \"series\": [\n",
       "        {\n",
       "            \"type\": \"bar\",\n",
       "            \"name\": \"(\\u603b\\u6570\\u636e102118\\u6761)\",\n",
       "            \"data\": [\n",
       "                102118,\n",
       "                6100,\n",
       "                96018\n",
       "            ],\n",
       "            \"stack\": \"stack_1201124\",\n",
       "            \"barCategoryGap\": \"20%\",\n",
       "            \"label\": {\n",
       "                \"normal\": {\n",
       "                    \"show\": true,\n",
       "                    \"position\": \"top\",\n",
       "                    \"textStyle\": {\n",
       "                        \"color\": \"#000\",\n",
       "                        \"fontSize\": 12\n",
       "                    },\n",
       "                    \"formatter\": null\n",
       "                },\n",
       "                \"emphasis\": {\n",
       "                    \"show\": true,\n",
       "                    \"position\": null,\n",
       "                    \"textStyle\": {\n",
       "                        \"color\": \"#fff\",\n",
       "                        \"fontSize\": 12\n",
       "                    }\n",
       "                }\n",
       "            },\n",
       "            \"markPoint\": {\n",
       "                \"data\": []\n",
       "            },\n",
       "            \"markLine\": {\n",
       "                \"data\": []\n",
       "            },\n",
       "            \"seriesId\": 1201124\n",
       "        }\n",
       "    ],\n",
       "    \"legend\": [\n",
       "        {\n",
       "            \"data\": [\n",
       "                \"(\\u603b\\u6570\\u636e102118\\u6761)\"\n",
       "            ],\n",
       "            \"selectedMode\": \"multiple\",\n",
       "            \"show\": true,\n",
       "            \"left\": \"center\",\n",
       "            \"top\": \"top\",\n",
       "            \"orient\": \"horizontal\",\n",
       "            \"textStyle\": {\n",
       "                \"fontSize\": 12,\n",
       "                \"color\": \"#333\"\n",
       "            }\n",
       "        }\n",
       "    ],\n",
       "    \"backgroundColor\": \"#fff\",\n",
       "    \"xAxis\": [\n",
       "        {\n",
       "            \"name\": \"\",\n",
       "            \"show\": true,\n",
       "            \"nameLocation\": \"middle\",\n",
       "            \"nameGap\": 25,\n",
       "            \"nameTextStyle\": {\n",
       "                \"fontSize\": 14\n",
       "            },\n",
       "            \"axisLabel\": {\n",
       "                \"interval\": \"auto\",\n",
       "                \"rotate\": 0,\n",
       "                \"margin\": 8,\n",
       "                \"textStyle\": {\n",
       "                    \"fontSize\": 20,\n",
       "                    \"color\": \"#000\"\n",
       "                }\n",
       "            },\n",
       "            \"axisTick\": {\n",
       "                \"alignWithLabel\": false\n",
       "            },\n",
       "            \"inverse\": false,\n",
       "            \"position\": null,\n",
       "            \"boundaryGap\": true,\n",
       "            \"min\": null,\n",
       "            \"max\": null,\n",
       "            \"data\": [\n",
       "                \"\\u603b\\u8f6c\\u53d1\\u91cf\",\n",
       "                \"\\u5047\\u7c89\\u4e1d\\u8f6c\\u53d1\\u91cf\",\n",
       "                \"\\u771f\\u7c89\\u4e1d\\u8f6c\\u53d1\\u91cf\"\n",
       "            ],\n",
       "            \"type\": \"category\"\n",
       "        }\n",
       "    ],\n",
       "    \"yAxis\": [\n",
       "        {\n",
       "            \"name\": \"\",\n",
       "            \"show\": true,\n",
       "            \"nameLocation\": \"middle\",\n",
       "            \"nameGap\": 25,\n",
       "            \"nameTextStyle\": {\n",
       "                \"fontSize\": 14\n",
       "            },\n",
       "            \"axisLabel\": {\n",
       "                \"formatter\": \"{value} \",\n",
       "                \"rotate\": 0,\n",
       "                \"interval\": \"auto\",\n",
       "                \"margin\": 8,\n",
       "                \"textStyle\": {\n",
       "                    \"fontSize\": 14,\n",
       "                    \"color\": \"#000\"\n",
       "                }\n",
       "            },\n",
       "            \"axisTick\": {\n",
       "                \"alignWithLabel\": false\n",
       "            },\n",
       "            \"inverse\": false,\n",
       "            \"position\": null,\n",
       "            \"boundaryGap\": true,\n",
       "            \"min\": null,\n",
       "            \"max\": null,\n",
       "            \"splitLine\": {\n",
       "                \"show\": true\n",
       "            },\n",
       "            \"type\": \"value\"\n",
       "        }\n",
       "    ],\n",
       "    \"color\": [\n",
       "        \"#c23531\",\n",
       "        \"#2f4554\",\n",
       "        \"#61a0a8\",\n",
       "        \"#d48265\",\n",
       "        \"#749f83\",\n",
       "        \"#ca8622\",\n",
       "        \"#bda29a\",\n",
       "        \"#6e7074\",\n",
       "        \"#546570\",\n",
       "        \"#c4ccd3\",\n",
       "        \"#f05b72\",\n",
       "        \"#ef5b9c\",\n",
       "        \"#f47920\",\n",
       "        \"#905a3d\",\n",
       "        \"#fab27b\",\n",
       "        \"#2a5caa\",\n",
       "        \"#444693\",\n",
       "        \"#726930\",\n",
       "        \"#b2d235\",\n",
       "        \"#6d8346\",\n",
       "        \"#ac6767\",\n",
       "        \"#1d953f\",\n",
       "        \"#6950a1\",\n",
       "        \"#918597\",\n",
       "        \"#f6f5ec\"\n",
       "    ]\n",
       "};\n",
       "myChart_880eeb900ac346bf995165dde256052c.setOption(option_880eeb900ac346bf995165dde256052c);\n",
       "\n",
       "    });\n",
       "</script>\n"
      ],
      "text/plain": [
       "<pyecharts.charts.bar.Bar at 0x106718278>"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "bar = Bar(\"吴亦凡真假流量的转发量\", width = 600,height=500)\n",
    "bar.add(\"(总数据102118条)\", ['总转发量', '假粉丝转发量', '真粉丝转发量'], \n",
    "        [data.shape[0], data_fake.shape[0], data_true.shape[0]], is_stack=True, \n",
    "       xaxis_label_textsize=20, yaxis_label_textsize=14, is_label_show=True)\n",
    "bar"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "real_fans_num = data_true.drop_duplicates(subset='user.id').shape[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<script>\n",
       "    require.config({\n",
       "        paths: {\n",
       "            'echarts': '/nbextensions/echarts/echarts.min'\n",
       "        }\n",
       "    });\n",
       "</script>\n",
       "    <div id=\"5c7e94540bf040548d603670c24d31df\" style=\"width:600px;height:500px;\"></div>\n",
       "\n",
       "\n",
       "<script>\n",
       "    require(['echarts'], function(echarts) {\n",
       "        \n",
       "var myChart_5c7e94540bf040548d603670c24d31df = echarts.init(document.getElementById('5c7e94540bf040548d603670c24d31df'), null, {renderer: 'canvas'});\n",
       "var option_5c7e94540bf040548d603670c24d31df = {\n",
       "    \"title\": [\n",
       "        {\n",
       "            \"text\": \"\\u5434\\u4ea6\\u51e1\\u771f\\u5047\\u6d41\\u91cf\\u7684\\u8f6c\\u53d1\\u91cf\\u4e0e\\u771f\\u5b9e\\u8f6c\\u53d1\\u7c89\\u4e1d\\u91cf(\\u603b\\u6570\\u636e102118\\u6761)\",\n",
       "            \"subtext\": \"\",\n",
       "            \"left\": \"auto\",\n",
       "            \"top\": \"auto\",\n",
       "            \"textStyle\": {\n",
       "                \"color\": \"#000\",\n",
       "                \"fontSize\": 18\n",
       "            },\n",
       "            \"subtextStyle\": {\n",
       "                \"color\": \"#aaa\",\n",
       "                \"fontSize\": 12\n",
       "            }\n",
       "        }\n",
       "    ],\n",
       "    \"toolbox\": {\n",
       "        \"show\": true,\n",
       "        \"orient\": \"vertical\",\n",
       "        \"left\": \"95%\",\n",
       "        \"top\": \"center\",\n",
       "        \"feature\": {\n",
       "            \"saveAsImage\": {\n",
       "                \"show\": true,\n",
       "                \"title\": \"\\u4e0b\\u8f7d\\u56fe\\u7247\"\n",
       "            },\n",
       "            \"restore\": {\n",
       "                \"show\": true\n",
       "            },\n",
       "            \"dataView\": {\n",
       "                \"show\": true\n",
       "            }\n",
       "        }\n",
       "    },\n",
       "    \"series_id\": 1874947,\n",
       "    \"tooltip\": {\n",
       "        \"trigger\": \"item\",\n",
       "        \"triggerOn\": \"mousemove|click\",\n",
       "        \"axisPointer\": {\n",
       "            \"type\": \"line\"\n",
       "        },\n",
       "        \"formatter\": null,\n",
       "        \"textStyle\": {\n",
       "            \"color\": \"#fff\",\n",
       "            \"fontSize\": 14\n",
       "        },\n",
       "        \"backgroundColor\": \"rgba(50,50,50,0.7)\",\n",
       "        \"borderColor\": \"#333\",\n",
       "        \"borderWidth\": 0\n",
       "    },\n",
       "    \"series\": [\n",
       "        {\n",
       "            \"type\": \"bar\",\n",
       "            \"name\": \"\",\n",
       "            \"data\": [\n",
       "                102118,\n",
       "                6100,\n",
       "                96018,\n",
       "                81872\n",
       "            ],\n",
       "            \"stack\": \"stack_1874947\",\n",
       "            \"barCategoryGap\": \"20%\",\n",
       "            \"label\": {\n",
       "                \"normal\": {\n",
       "                    \"show\": true,\n",
       "                    \"position\": \"top\",\n",
       "                    \"textStyle\": {\n",
       "                        \"color\": \"#000\",\n",
       "                        \"fontSize\": 12\n",
       "                    },\n",
       "                    \"formatter\": null\n",
       "                },\n",
       "                \"emphasis\": {\n",
       "                    \"show\": true,\n",
       "                    \"position\": null,\n",
       "                    \"textStyle\": {\n",
       "                        \"color\": \"#fff\",\n",
       "                        \"fontSize\": 12\n",
       "                    }\n",
       "                }\n",
       "            },\n",
       "            \"markPoint\": {\n",
       "                \"data\": []\n",
       "            },\n",
       "            \"markLine\": {\n",
       "                \"data\": []\n",
       "            },\n",
       "            \"seriesId\": 1874947\n",
       "        }\n",
       "    ],\n",
       "    \"legend\": [\n",
       "        {\n",
       "            \"data\": [\n",
       "                \"\"\n",
       "            ],\n",
       "            \"selectedMode\": \"multiple\",\n",
       "            \"show\": true,\n",
       "            \"left\": \"center\",\n",
       "            \"top\": \"top\",\n",
       "            \"orient\": \"horizontal\",\n",
       "            \"textStyle\": {\n",
       "                \"fontSize\": 12,\n",
       "                \"color\": \"#333\"\n",
       "            }\n",
       "        }\n",
       "    ],\n",
       "    \"backgroundColor\": \"#fff\",\n",
       "    \"xAxis\": [\n",
       "        {\n",
       "            \"name\": \"\",\n",
       "            \"show\": true,\n",
       "            \"nameLocation\": \"middle\",\n",
       "            \"nameGap\": 25,\n",
       "            \"nameTextStyle\": {\n",
       "                \"fontSize\": 14\n",
       "            },\n",
       "            \"axisLabel\": {\n",
       "                \"interval\": \"auto\",\n",
       "                \"rotate\": 20,\n",
       "                \"margin\": 8,\n",
       "                \"textStyle\": {\n",
       "                    \"fontSize\": 20,\n",
       "                    \"color\": \"#000\"\n",
       "                }\n",
       "            },\n",
       "            \"axisTick\": {\n",
       "                \"alignWithLabel\": false\n",
       "            },\n",
       "            \"inverse\": false,\n",
       "            \"position\": null,\n",
       "            \"boundaryGap\": true,\n",
       "            \"min\": null,\n",
       "            \"max\": null,\n",
       "            \"data\": [\n",
       "                \"\\u603b\\u8f6c\\u53d1\\u91cf\",\n",
       "                \"\\u5047\\u7c89\\u4e1d\\u8f6c\\u53d1\\u91cf\",\n",
       "                \"\\u771f\\u7c89\\u4e1d\\u8f6c\\u53d1\\u91cf\",\n",
       "                \"\\u771f\\u5b9e\\u8f6c\\u53d1\\u7c89\\u4e1d\\u91cf\"\n",
       "            ],\n",
       "            \"type\": \"category\"\n",
       "        }\n",
       "    ],\n",
       "    \"yAxis\": [\n",
       "        {\n",
       "            \"name\": \"\",\n",
       "            \"show\": true,\n",
       "            \"nameLocation\": \"middle\",\n",
       "            \"nameGap\": 25,\n",
       "            \"nameTextStyle\": {\n",
       "                \"fontSize\": 14\n",
       "            },\n",
       "            \"axisLabel\": {\n",
       "                \"formatter\": \"{value} \",\n",
       "                \"rotate\": 0,\n",
       "                \"interval\": \"auto\",\n",
       "                \"margin\": 8,\n",
       "                \"textStyle\": {\n",
       "                    \"fontSize\": 14,\n",
       "                    \"color\": \"#000\"\n",
       "                }\n",
       "            },\n",
       "            \"axisTick\": {\n",
       "                \"alignWithLabel\": false\n",
       "            },\n",
       "            \"inverse\": false,\n",
       "            \"position\": null,\n",
       "            \"boundaryGap\": true,\n",
       "            \"min\": null,\n",
       "            \"max\": null,\n",
       "            \"splitLine\": {\n",
       "                \"show\": true\n",
       "            },\n",
       "            \"type\": \"value\"\n",
       "        }\n",
       "    ],\n",
       "    \"color\": [\n",
       "        \"#c23531\",\n",
       "        \"#2f4554\",\n",
       "        \"#61a0a8\",\n",
       "        \"#d48265\",\n",
       "        \"#749f83\",\n",
       "        \"#ca8622\",\n",
       "        \"#bda29a\",\n",
       "        \"#6e7074\",\n",
       "        \"#546570\",\n",
       "        \"#c4ccd3\",\n",
       "        \"#f05b72\",\n",
       "        \"#ef5b9c\",\n",
       "        \"#f47920\",\n",
       "        \"#905a3d\",\n",
       "        \"#fab27b\",\n",
       "        \"#2a5caa\",\n",
       "        \"#444693\",\n",
       "        \"#726930\",\n",
       "        \"#b2d235\",\n",
       "        \"#6d8346\",\n",
       "        \"#ac6767\",\n",
       "        \"#1d953f\",\n",
       "        \"#6950a1\",\n",
       "        \"#918597\",\n",
       "        \"#f6f5ec\"\n",
       "    ]\n",
       "};\n",
       "myChart_5c7e94540bf040548d603670c24d31df.setOption(option_5c7e94540bf040548d603670c24d31df);\n",
       "\n",
       "    });\n",
       "</script>\n"
      ],
      "text/plain": [
       "<pyecharts.charts.bar.Bar at 0x106718cf8>"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "bar = Bar(\"吴亦凡真假流量的转发量与真实转发粉丝量(总数据102118条)\", width = 600,height=500)\n",
    "bar.add('', ['总转发量', '假粉丝转发量', '真粉丝转发量', '真实转发粉丝量'], \n",
    "        [data.shape[0], data_fake.shape[0], data_true.shape[0], real_fans_num], is_stack=True, \n",
    "       xaxis_label_textsize=20, yaxis_label_textsize=14, is_label_show=True, xaxis_rotate=20)\n",
    "bar"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "真实转发粉丝量占总转发数的80.17%\n"
     ]
    }
   ],
   "source": [
    "print('真实转发粉丝量占总转发数的{}%'.format(np.round(real_fans_num/data.shape[0]*100, 2)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2. 大家对于《大碗宽面》怎么看？"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 102118 entries, 0 to 102117\n",
      "Data columns (total 20 columns):\n",
      "attitudes_count           102118 non-null int64\n",
      "comments_count            102118 non-null int64\n",
      "reposts_count             102118 non-null int64\n",
      "mid                       102118 non-null object\n",
      "raw_text                  102118 non-null object\n",
      "source                    102118 non-null object\n",
      "user.description          102118 non-null object\n",
      "user.follow_count         102118 non-null int64\n",
      "user.followers_count      102118 non-null int64\n",
      "user.gender               102118 non-null object\n",
      "user.id                   102118 non-null int64\n",
      "user.mbrank               102118 non-null int64\n",
      "user.mbtype               102118 non-null int64\n",
      "user.profile_url          102118 non-null object\n",
      "user.profile_image_url    102118 non-null object\n",
      "user.screen_name          102118 non-null object\n",
      "user.statuses_count       102118 non-null int64\n",
      "user.urank                102118 non-null int64\n",
      "user.verified             102118 non-null bool\n",
      "user.verified_reason      5260 non-null object\n",
      "dtypes: bool(1), int64(10), object(9)\n",
      "memory usage: 14.9+ MB\n"
     ]
    }
   ],
   "source": [
    "data.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [],
   "source": [
    "from snownlp import SnowNLP\n",
    "\n",
    "\n",
    "def get_sent_snownlp(data):\n",
    "    s = SnowNLP(data)\n",
    "    return s.sentiments"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_true['clean_text'] = data_true['raw_text'].str.split(\"//\", expand=True)[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_true['sent_nlp'] = data_true.loc[data_true['clean_text']!='', 'clean_text'].apply(get_sent_snownlp)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 433,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>clean_text</th>\n",
       "      <th>sent_nlp</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>30370</th>\n",
       "      <td>哈哈哈哈，第一次转发吴亦凡的微博，之前都是在鬼畜区见到，莫名觉得好听</td>\n",
       "      <td>0.967346</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>91789</th>\n",
       "      <td>我觉得不错啊？</td>\n",
       "      <td>0.861213</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>79664</th>\n",
       "      <td>转发微博</td>\n",
       "      <td>0.643891</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>82108</th>\n",
       "      <td>你看这个面它又长又宽，你看这个碗它又大又圆[允悲] mv好Q啊</td>\n",
       "      <td>0.903441</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46573</th>\n",
       "      <td>我晕我现在心情真的好复杂</td>\n",
       "      <td>0.111104</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                               clean_text  sent_nlp\n",
       "30370  哈哈哈哈，第一次转发吴亦凡的微博，之前都是在鬼畜区见到，莫名觉得好听  0.967346\n",
       "91789                             我觉得不错啊？  0.861213\n",
       "79664                                转发微博  0.643891\n",
       "82108     你看这个面它又长又宽，你看这个碗它又大又圆[允悲] mv好Q啊  0.903441\n",
       "46573                        我晕我现在心情真的好复杂  0.111104"
      ]
     },
     "execution_count": 433,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_true[['clean_text', 'sent_nlp']].sample(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.6860448043677209"
      ]
     },
     "execution_count": 80,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_true.loc[-data_true['clean_text'].isin(['转发微博', 'repost', '轉發微博']), 'sent_nlp'].mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 107,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<script>\n",
       "    require.config({\n",
       "        paths: {\n",
       "            'echarts': '/nbextensions/echarts/echarts.min'\n",
       "        }\n",
       "    });\n",
       "</script>\n",
       "    <div id=\"40fd7313e21a4b9392be913ef1131e95\" style=\"width:800px;height:400px;\"></div>\n",
       "\n",
       "\n",
       "<script>\n",
       "    require(['echarts'], function(echarts) {\n",
       "        \n",
       "var myChart_40fd7313e21a4b9392be913ef1131e95 = echarts.init(document.getElementById('40fd7313e21a4b9392be913ef1131e95'), null, {renderer: 'canvas'});\n",
       "var option_40fd7313e21a4b9392be913ef1131e95 = {\n",
       "    \"title\": [\n",
       "        {\n",
       "            \"text\": \"\",\n",
       "            \"subtext\": \"\",\n",
       "            \"left\": \"auto\",\n",
       "            \"top\": \"auto\",\n",
       "            \"textStyle\": {\n",
       "                \"color\": \"#000\",\n",
       "                \"fontSize\": 18\n",
       "            },\n",
       "            \"subtextStyle\": {\n",
       "                \"color\": \"#aaa\",\n",
       "                \"fontSize\": 12\n",
       "            }\n",
       "        }\n",
       "    ],\n",
       "    \"toolbox\": {\n",
       "        \"show\": true,\n",
       "        \"orient\": \"vertical\",\n",
       "        \"left\": \"95%\",\n",
       "        \"top\": \"center\",\n",
       "        \"feature\": {\n",
       "            \"saveAsImage\": {\n",
       "                \"show\": true,\n",
       "                \"title\": \"\\u4e0b\\u8f7d\\u56fe\\u7247\"\n",
       "            },\n",
       "            \"restore\": {\n",
       "                \"show\": true\n",
       "            },\n",
       "            \"dataView\": {\n",
       "                \"show\": true\n",
       "            }\n",
       "        }\n",
       "    },\n",
       "    \"series_id\": 2798101,\n",
       "    \"tooltip\": {\n",
       "        \"trigger\": \"item\",\n",
       "        \"triggerOn\": \"mousemove|click\",\n",
       "        \"axisPointer\": {\n",
       "            \"type\": \"line\"\n",
       "        },\n",
       "        \"formatter\": \"{a} <br/>{b} : {c}%\",\n",
       "        \"textStyle\": {\n",
       "            \"color\": \"#fff\",\n",
       "            \"fontSize\": 14\n",
       "        },\n",
       "        \"backgroundColor\": \"rgba(50,50,50,0.7)\",\n",
       "        \"borderColor\": \"#333\",\n",
       "        \"borderWidth\": 0\n",
       "    },\n",
       "    \"series\": [\n",
       "        {\n",
       "            \"type\": \"gauge\",\n",
       "            \"detail\": {\n",
       "                \"formatter\": \"{value}%\"\n",
       "            },\n",
       "            \"name\": \"\",\n",
       "            \"min\": 0,\n",
       "            \"max\": 100,\n",
       "            \"startAngle\": 225,\n",
       "            \"endAngle\": -45,\n",
       "            \"data\": [\n",
       "                {\n",
       "                    \"value\": [\n",
       "                        68.6\n",
       "                    ],\n",
       "                    \"name\": [\n",
       "                        \"\\u8bc4\\u8bba\\u5bf9\\u300a\\u5927\\u7897\\u5bbd\\u9762\\u300b\\n\\u7684\\u5e73\\u5747\\u8bc4\\u5206\"\n",
       "                    ]\n",
       "                }\n",
       "            ]\n",
       "        }\n",
       "    ],\n",
       "    \"legend\": [\n",
       "        {\n",
       "            \"data\": [\n",
       "                \"\"\n",
       "            ],\n",
       "            \"selectedMode\": \"multiple\",\n",
       "            \"show\": true,\n",
       "            \"left\": \"center\",\n",
       "            \"top\": \"top\",\n",
       "            \"orient\": \"horizontal\",\n",
       "            \"textStyle\": {\n",
       "                \"fontSize\": 12,\n",
       "                \"color\": \"#333\"\n",
       "            }\n",
       "        }\n",
       "    ],\n",
       "    \"backgroundColor\": \"#fff\",\n",
       "    \"color\": [\n",
       "        \"#c23531\",\n",
       "        \"#2f4554\",\n",
       "        \"#61a0a8\",\n",
       "        \"#d48265\",\n",
       "        \"#749f83\",\n",
       "        \"#ca8622\",\n",
       "        \"#bda29a\",\n",
       "        \"#6e7074\",\n",
       "        \"#546570\",\n",
       "        \"#c4ccd3\",\n",
       "        \"#f05b72\",\n",
       "        \"#ef5b9c\",\n",
       "        \"#f47920\",\n",
       "        \"#905a3d\",\n",
       "        \"#fab27b\",\n",
       "        \"#2a5caa\",\n",
       "        \"#444693\",\n",
       "        \"#726930\",\n",
       "        \"#b2d235\",\n",
       "        \"#6d8346\",\n",
       "        \"#ac6767\",\n",
       "        \"#1d953f\",\n",
       "        \"#6950a1\",\n",
       "        \"#918597\",\n",
       "        \"#f6f5ec\"\n",
       "    ]\n",
       "};\n",
       "myChart_40fd7313e21a4b9392be913ef1131e95.setOption(option_40fd7313e21a4b9392be913ef1131e95);\n",
       "\n",
       "    });\n",
       "</script>\n"
      ],
      "text/plain": [
       "<pyecharts.charts.gauge.Gauge at 0x128af5e80>"
      ]
     },
     "execution_count": 107,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from pyecharts import Gauge, Page\n",
    "\n",
    "\n",
    "g = Gauge()\n",
    "g.add('', ['评论对《大碗宽面》\\n的平均评分'], [68.6])\n",
    "g"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 165,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>clean_text</th>\n",
       "      <th>sent_nlp</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>39966</th>\n",
       "      <td>吴亦凡做的太酷了</td>\n",
       "      <td>0.909254</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49314</th>\n",
       "      <td>啊啊啊啊啊啊，牛鹿锁死</td>\n",
       "      <td>0.924473</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>82926</th>\n",
       "      <td>讲真，这首歌很有旋律啊[赞][赞][赞]</td>\n",
       "      <td>0.999965</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10412</th>\n",
       "      <td>瑞思拜瑞思拜</td>\n",
       "      <td>0.988395</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3454</th>\n",
       "      <td>很接地气了哈哈哈哈哈</td>\n",
       "      <td>0.972198</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 clean_text  sent_nlp\n",
       "39966              吴亦凡做的太酷了  0.909254\n",
       "49314           啊啊啊啊啊啊，牛鹿锁死  0.924473\n",
       "82926  讲真，这首歌很有旋律啊[赞][赞][赞]  0.999965\n",
       "10412                瑞思拜瑞思拜  0.988395\n",
       "3454             很接地气了哈哈哈哈哈  0.972198"
      ]
     },
     "execution_count": 165,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_true.loc[data_true['sent_nlp']>0.9, ['clean_text', 'sent_nlp']].sample(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 170,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>user.screen_name</th>\n",
       "      <th>clean_text</th>\n",
       "      <th>attitudes_count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>20939</th>\n",
       "      <td>何炅</td>\n",
       "      <td>吴亦凡好有趣一男的。</td>\n",
       "      <td>30891</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>74001</th>\n",
       "      <td>舒淇</td>\n",
       "      <td>肚子暖暖 心就暖 [色][色][色]</td>\n",
       "      <td>29275</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>90481</th>\n",
       "      <td>包贝尔</td>\n",
       "      <td>饿了，你吃啥呢？</td>\n",
       "      <td>14388</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25295</th>\n",
       "      <td>BeatsbyDre</td>\n",
       "      <td>朋友，吃面吗？又长又宽的那种👇</td>\n",
       "      <td>1287</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52621</th>\n",
       "      <td>鹿透社</td>\n",
       "      <td>鹿晗也发歌了，而且还是认真用心做的好歌，希望你朋友圈的微商每次找你转发的时候也能回馈一下，哦...</td>\n",
       "      <td>709</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>72937</th>\n",
       "      <td>Clear清扬</td>\n",
       "      <td>清扬能让你头发无懈可击，这也确是我本意！明天的演唱会，我在南京等你！Ah wu ah nah...</td>\n",
       "      <td>556</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1410</th>\n",
       "      <td>闫紫境GwAwa</td>\n",
       "      <td>这就是hiphop！Hiphop的精神最重要！[good][good][good]</td>\n",
       "      <td>555</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25552</th>\n",
       "      <td>湖南卫视七十二层奇楼</td>\n",
       "      <td>我听这歌又酷又甜[心] 凡凡  好久不见甚是想念～</td>\n",
       "      <td>549</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19120</th>\n",
       "      <td>限定热狗丨思聪</td>\n",
       "      <td>吃了这碗面🍜我们就是一家人[加油]</td>\n",
       "      <td>418</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20671</th>\n",
       "      <td>西西里岛岛主金女士</td>\n",
       "      <td>哈哈哈哈哈哈哈哈哈他真的好可爱哦！！！！！！！！！</td>\n",
       "      <td>318</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      user.screen_name                                         clean_text  \\\n",
       "20939               何炅                                         吴亦凡好有趣一男的。   \n",
       "74001               舒淇                                 肚子暖暖 心就暖 [色][色][色]   \n",
       "90481              包贝尔                                           饿了，你吃啥呢？   \n",
       "25295       BeatsbyDre                                    朋友，吃面吗？又长又宽的那种👇   \n",
       "52621              鹿透社  鹿晗也发歌了，而且还是认真用心做的好歌，希望你朋友圈的微商每次找你转发的时候也能回馈一下，哦...   \n",
       "72937          Clear清扬  清扬能让你头发无懈可击，这也确是我本意！明天的演唱会，我在南京等你！Ah wu ah nah...   \n",
       "1410          闫紫境GwAwa          这就是hiphop！Hiphop的精神最重要！[good][good][good]   \n",
       "25552       湖南卫视七十二层奇楼                          我听这歌又酷又甜[心] 凡凡  好久不见甚是想念～   \n",
       "19120          限定热狗丨思聪                                  吃了这碗面🍜我们就是一家人[加油]   \n",
       "20671        西西里岛岛主金女士                          哈哈哈哈哈哈哈哈哈他真的好可爱哦！！！！！！！！！   \n",
       "\n",
       "       attitudes_count  \n",
       "20939            30891  \n",
       "74001            29275  \n",
       "90481            14388  \n",
       "25295             1287  \n",
       "52621              709  \n",
       "72937              556  \n",
       "1410               555  \n",
       "25552              549  \n",
       "19120              418  \n",
       "20671              318  "
      ]
     },
     "execution_count": 170,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_true.loc[data_true['attitudes_count'].sort_values(ascending=False)[:10].index, ['user.screen_name', 'clean_text', 'attitudes_count']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 173,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>user.screen_name</th>\n",
       "      <th>clean_text</th>\n",
       "      <th>attitudes_count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>38474</th>\n",
       "      <td>slayerboom</td>\n",
       "      <td>高啊~</td>\n",
       "      <td>30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>64727</th>\n",
       "      <td>婕大酱</td>\n",
       "      <td>可可爱爱吴亦凡[嘿哈][嘿哈]</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16831</th>\n",
       "      <td>小精灵real</td>\n",
       "      <td>支持吴老师的娱乐精神</td>\n",
       "      <td>61</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>53112</th>\n",
       "      <td>张金堡</td>\n",
       "      <td>你看这个驰，他又帅又俊✌</td>\n",
       "      <td>158</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21270</th>\n",
       "      <td>大葱哥聊KPL</td>\n",
       "      <td>曾经有一碗真挚的宽面放在葱面前，葱没有珍惜，甚至还嗤之以鼻。经历了种种对耳朵的洗礼，我才懂食...</td>\n",
       "      <td>18</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      user.screen_name                                         clean_text  \\\n",
       "38474       slayerboom                                                高啊~   \n",
       "64727              婕大酱                                    可可爱爱吴亦凡[嘿哈][嘿哈]   \n",
       "16831          小精灵real                                         支持吴老师的娱乐精神   \n",
       "53112              张金堡                                       你看这个驰，他又帅又俊✌   \n",
       "21270          大葱哥聊KPL  曾经有一碗真挚的宽面放在葱面前，葱没有珍惜，甚至还嗤之以鼻。经历了种种对耳朵的洗礼，我才懂食...   \n",
       "\n",
       "       attitudes_count  \n",
       "38474               30  \n",
       "64727               25  \n",
       "16831               61  \n",
       "53112              158  \n",
       "21270               18  "
      ]
     },
     "execution_count": 173,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_true.loc[data_true['attitudes_count'].sort_values(ascending=False)[:100].index, ['user.screen_name', 'clean_text', 'attitudes_count']].sample(5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 3. 有多少人拿吴亦凡跟蔡徐坤做对比？"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 174,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 96018 entries, 0 to 102117\n",
      "Data columns (total 22 columns):\n",
      "attitudes_count           96018 non-null int64\n",
      "comments_count            96018 non-null int64\n",
      "reposts_count             96018 non-null int64\n",
      "mid                       96018 non-null object\n",
      "raw_text                  96018 non-null object\n",
      "source                    96018 non-null object\n",
      "user.description          96018 non-null object\n",
      "user.follow_count         96018 non-null int64\n",
      "user.followers_count      96018 non-null int64\n",
      "user.gender               96018 non-null object\n",
      "user.id                   96018 non-null int64\n",
      "user.mbrank               96018 non-null int64\n",
      "user.mbtype               96018 non-null int64\n",
      "user.profile_url          96018 non-null object\n",
      "user.profile_image_url    96018 non-null object\n",
      "user.screen_name          96018 non-null object\n",
      "user.statuses_count       96018 non-null int64\n",
      "user.urank                96018 non-null int64\n",
      "user.verified             96018 non-null bool\n",
      "user.verified_reason      5257 non-null object\n",
      "clean_text                96018 non-null object\n",
      "sent_nlp                  85911 non-null float64\n",
      "dtypes: bool(1), float64(1), int64(10), object(10)\n",
      "memory usage: 18.7+ MB\n"
     ]
    }
   ],
   "source": [
    "data_true.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 290,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "65939                                              \n",
       "18957                            何老师～这个语气好像粉丝说的哈哈哈哈\n",
       "53931                 我家这位兄弟真的是又酷又超级有梗的[喵喵][佩奇][爱你]\n",
       "53496                                     何老师也是可爱的！\n",
       "20893                  这个真的好听。。。我他妈真的，反差太大了，多出这种歌啊！\n",
       "39047                                          转发微博\n",
       "10531                    为什么有了蔡徐坤 感觉吴亦凡没那么讨厌了[doge]\n",
       "53778                                         期待inh\n",
       "3962     想起哥哥在中国新说唱的时候说想把中国风带到rap里，哥哥真的有一直在努力。[好爱哦]\n",
       "74339                         作为吃货团队的一名成员，必须得大碗吃面呀！\n",
       "Name: clean_text, dtype: object"
      ]
     },
     "execution_count": 290,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_true['clean_text'].sample(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 347,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "6229"
      ]
     },
     "execution_count": 347,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_true['raw_text'].str.contains('kun|坤|律师|球|函|cxk|比|弟|CXK|胸|格局|气度|衬托').sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 436,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_kun = data_true.loc[data_true['raw_text'].str.contains('kun|坤|律师|球|函|cxk|比|弟|CXK|胸|格局|气度|衬托'),\n",
    "             ['user.screen_name', 'raw_text', 'attitudes_count']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 457,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>raw_text</th>\n",
       "      <th>attitudes_count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>92800</th>\n",
       "      <td>现在艺人公关真的是很厉害，能把嘲点转换为自己的亮点，就比如这个大碗宽面，这么一发想嘲的人可能...</td>\n",
       "      <td>43</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27741</th>\n",
       "      <td>凡凡趁着坤坤这波居然完美洗白了？？关键的是歌还不错？！//@藤新Jiven:老吳這個可以，哈...</td>\n",
       "      <td>42</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1329</th>\n",
       "      <td>妙妙妙！大气又可爱！才华横溢我的大凡凡！瑞思拜[米妮爱你]#吴亦凡[超话]# [米奇比心][...</td>\n",
       "      <td>37</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>82171</th>\n",
       "      <td>自黑新高度[允悲]这心态不得不佩服！我凡真不是普通人的气度，爱了爱了！永远支持你呀！[心][...</td>\n",
       "      <td>21</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30017</th>\n",
       "      <td>出大事了 cxk要g了吗，凡少也开始玩怪东西了</td>\n",
       "      <td>20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>64918</th>\n",
       "      <td>和律师函相比真的是高下立判了</td>\n",
       "      <td>17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>84339</th>\n",
       "      <td>还是凡凡格局大[赞]cxk就是个弟弟</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5722</th>\n",
       "      <td>我的宝贝今天真的好棒[羞嗒嗒] 顺便瞎说一番 今天打开空间朋友圈全部都是对不起吴亦凡的发言 ...</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30014</th>\n",
       "      <td>我也欠吴老师一个道歉，起码是条会打篮球的刚烈汉子。</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50951</th>\n",
       "      <td>天蝎气场让人觉得有距离且紧绷给人霸道的假象，好多天蝎明星都经历过被人diss人设太霸道总裁太...</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                raw_text  attitudes_count\n",
       "92800  现在艺人公关真的是很厉害，能把嘲点转换为自己的亮点，就比如这个大碗宽面，这么一发想嘲的人可能...               43\n",
       "27741  凡凡趁着坤坤这波居然完美洗白了？？关键的是歌还不错？！//@藤新Jiven:老吳這個可以，哈...               42\n",
       "1329   妙妙妙！大气又可爱！才华横溢我的大凡凡！瑞思拜[米妮爱你]#吴亦凡[超话]# [米奇比心][...               37\n",
       "82171  自黑新高度[允悲]这心态不得不佩服！我凡真不是普通人的气度，爱了爱了！永远支持你呀！[心][...               21\n",
       "30017                            出大事了 cxk要g了吗，凡少也开始玩怪东西了               20\n",
       "64918                                     和律师函相比真的是高下立判了               17\n",
       "84339                                 还是凡凡格局大[赞]cxk就是个弟弟               16\n",
       "5722   我的宝贝今天真的好棒[羞嗒嗒] 顺便瞎说一番 今天打开空间朋友圈全部都是对不起吴亦凡的发言 ...               12\n",
       "30014                          我也欠吴老师一个道歉，起码是条会打篮球的刚烈汉子。               12\n",
       "50951  天蝎气场让人觉得有距离且紧绷给人霸道的假象，好多天蝎明星都经历过被人diss人设太霸道总裁太...               11"
      ]
     },
     "execution_count": 457,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_kun.loc[list(data_kun['attitudes_count'].nlargest(10).index), ['raw_text', 'attitudes_count']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 349,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(96018, 22)"
      ]
     },
     "execution_count": 349,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_true[data_true['raw_text']!=''].shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 350,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.06487325293174197"
      ]
     },
     "execution_count": 350,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "6229/96018"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 4. 有多少人开始路转粉了？"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 346,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "4777                                                  转发微博\n",
       "54202                                                 那个磊呢\n",
       "49000                                                     \n",
       "62390                                             卧槽，凡聪是真的\n",
       "84587                                                     \n",
       "95667    有人把综艺梗代入到你的音乐作品 你用自己的方式 放过他们和自己 对于今天的我来说 非常应景了...\n",
       "43513                                                 转发微博\n",
       "15210                                        只有我一个人想哭吗....\n",
       "58018    不过综艺随便的一个梗 走到哪都有人黑 也不知道为啥都拿来当祖训背 然后嘻嘻哈哈的 我哥真是太...\n",
       "87223                                                 姐姐晚安\n",
       "Name: clean_text, dtype: object"
      ]
     },
     "execution_count": 346,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_true['clean_text'].sample(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 351,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3646"
      ]
     },
     "execution_count": 351,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_true['raw_text'].str.contains('转粉|爱上|重新|路|圈粉|espect|瑞思拜').sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 362,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>user.screen_name</th>\n",
       "      <th>raw_text</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>89866</th>\n",
       "      <td>从没选对过</td>\n",
       "      <td>黑转路带点粉了，毕竟那么多人说咱像</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1744</th>\n",
       "      <td>玺欢侬吖</td>\n",
       "      <td>被圈粉了[喵喵]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1097</th>\n",
       "      <td>是灰灰呀是灰灰</td>\n",
       "      <td>圈粉</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31501</th>\n",
       "      <td>明明爱kris</td>\n",
       "      <td>#吴亦凡[超话]#//@PP音乐官方微博:我不得不瑞思拜 从去年的diss track 到今...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>61191</th>\n",
       "      <td>蟹小排</td>\n",
       "      <td>哇，好听，转粉了呀！好有趣啊</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      user.screen_name                                           raw_text\n",
       "89866            从没选对过                                  黑转路带点粉了，毕竟那么多人说咱像\n",
       "1744              玺欢侬吖                                           被圈粉了[喵喵]\n",
       "1097           是灰灰呀是灰灰                                                 圈粉\n",
       "31501          明明爱kris  #吴亦凡[超话]#//@PP音乐官方微博:我不得不瑞思拜 从去年的diss track 到今...\n",
       "61191              蟹小排                                     哇，好听，转粉了呀！好有趣啊"
      ]
     },
     "execution_count": 362,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_true.loc[data_true['raw_text'].str.contains('转粉|爱上|重新|路|圈粉|espect|瑞思拜'),\n",
    "             ['user.screen_name', 'raw_text']].sample(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 363,
   "metadata": {},
   "outputs": [],
   "source": [
    "fans = data_true.loc[data_true['raw_text'].str.contains('转粉|爱上|重新|路|圈粉|espect|瑞思拜'), \n",
    "                     'user.gender'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 364,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<script>\n",
       "    require.config({\n",
       "        paths: {\n",
       "            'echarts': '/nbextensions/echarts/echarts.min'\n",
       "        }\n",
       "    });\n",
       "</script>\n",
       "    <div id=\"b78a0f5bc1374b49b9543e2d8d8f1188\" style=\"width:600px;height:500px;\"></div>\n",
       "\n",
       "\n",
       "<script>\n",
       "    require(['echarts'], function(echarts) {\n",
       "        \n",
       "var myChart_b78a0f5bc1374b49b9543e2d8d8f1188 = echarts.init(document.getElementById('b78a0f5bc1374b49b9543e2d8d8f1188'), null, {renderer: 'canvas'});\n",
       "var option_b78a0f5bc1374b49b9543e2d8d8f1188 = {\n",
       "    \"title\": [\n",
       "        {\n",
       "            \"text\": \"\\u8def\\u8f6c\\u7c89\\u7684\\u7537\\u5973\\u6027\\u522b\\u6bd4\\u4f8b\",\n",
       "            \"subtext\": \"\",\n",
       "            \"left\": \"auto\",\n",
       "            \"top\": \"auto\",\n",
       "            \"textStyle\": {\n",
       "                \"color\": \"#000\",\n",
       "                \"fontSize\": 18\n",
       "            },\n",
       "            \"subtextStyle\": {\n",
       "                \"color\": \"#aaa\",\n",
       "                \"fontSize\": 12\n",
       "            }\n",
       "        }\n",
       "    ],\n",
       "    \"toolbox\": {\n",
       "        \"show\": true,\n",
       "        \"orient\": \"vertical\",\n",
       "        \"left\": \"95%\",\n",
       "        \"top\": \"center\",\n",
       "        \"feature\": {\n",
       "            \"saveAsImage\": {\n",
       "                \"show\": true,\n",
       "                \"title\": \"\\u4e0b\\u8f7d\\u56fe\\u7247\"\n",
       "            },\n",
       "            \"restore\": {\n",
       "                \"show\": true\n",
       "            },\n",
       "            \"dataView\": {\n",
       "                \"show\": true\n",
       "            }\n",
       "        }\n",
       "    },\n",
       "    \"series_id\": 3018162,\n",
       "    \"tooltip\": {\n",
       "        \"trigger\": \"item\",\n",
       "        \"triggerOn\": \"mousemove|click\",\n",
       "        \"axisPointer\": {\n",
       "            \"type\": \"line\"\n",
       "        },\n",
       "        \"formatter\": null,\n",
       "        \"textStyle\": {\n",
       "            \"color\": \"#fff\",\n",
       "            \"fontSize\": 14\n",
       "        },\n",
       "        \"backgroundColor\": \"rgba(50,50,50,0.7)\",\n",
       "        \"borderColor\": \"#333\",\n",
       "        \"borderWidth\": 0\n",
       "    },\n",
       "    \"series\": [\n",
       "        {\n",
       "            \"type\": \"bar\",\n",
       "            \"name\": \"\",\n",
       "            \"data\": [\n",
       "                2441.0,\n",
       "                1205.0\n",
       "            ],\n",
       "            \"stack\": \"stack_3018162\",\n",
       "            \"barCategoryGap\": \"20%\",\n",
       "            \"label\": {\n",
       "                \"normal\": {\n",
       "                    \"show\": true,\n",
       "                    \"position\": \"top\",\n",
       "                    \"textStyle\": {\n",
       "                        \"color\": \"#000\",\n",
       "                        \"fontSize\": 12\n",
       "                    },\n",
       "                    \"formatter\": null\n",
       "                },\n",
       "                \"emphasis\": {\n",
       "                    \"show\": true,\n",
       "                    \"position\": null,\n",
       "                    \"textStyle\": {\n",
       "                        \"color\": \"#fff\",\n",
       "                        \"fontSize\": 12\n",
       "                    }\n",
       "                }\n",
       "            },\n",
       "            \"markPoint\": {\n",
       "                \"data\": []\n",
       "            },\n",
       "            \"markLine\": {\n",
       "                \"data\": []\n",
       "            },\n",
       "            \"seriesId\": 3018162\n",
       "        }\n",
       "    ],\n",
       "    \"legend\": [\n",
       "        {\n",
       "            \"data\": [\n",
       "                \"\"\n",
       "            ],\n",
       "            \"selectedMode\": \"multiple\",\n",
       "            \"show\": true,\n",
       "            \"left\": \"center\",\n",
       "            \"top\": \"top\",\n",
       "            \"orient\": \"horizontal\",\n",
       "            \"textStyle\": {\n",
       "                \"fontSize\": 12,\n",
       "                \"color\": \"#333\"\n",
       "            }\n",
       "        }\n",
       "    ],\n",
       "    \"backgroundColor\": \"#fff\",\n",
       "    \"xAxis\": [\n",
       "        {\n",
       "            \"name\": \"\",\n",
       "            \"show\": true,\n",
       "            \"nameLocation\": \"middle\",\n",
       "            \"nameGap\": 25,\n",
       "            \"nameTextStyle\": {\n",
       "                \"fontSize\": 14\n",
       "            },\n",
       "            \"axisLabel\": {\n",
       "                \"interval\": \"auto\",\n",
       "                \"rotate\": 0,\n",
       "                \"margin\": 8,\n",
       "                \"textStyle\": {\n",
       "                    \"fontSize\": 20,\n",
       "                    \"color\": \"#000\"\n",
       "                }\n",
       "            },\n",
       "            \"axisTick\": {\n",
       "                \"alignWithLabel\": false\n",
       "            },\n",
       "            \"inverse\": false,\n",
       "            \"position\": null,\n",
       "            \"boundaryGap\": true,\n",
       "            \"min\": null,\n",
       "            \"max\": null,\n",
       "            \"data\": [\n",
       "                \"\\u5973\",\n",
       "                \"\\u7537\"\n",
       "            ],\n",
       "            \"type\": \"category\"\n",
       "        }\n",
       "    ],\n",
       "    \"yAxis\": [\n",
       "        {\n",
       "            \"name\": \"\",\n",
       "            \"show\": true,\n",
       "            \"nameLocation\": \"middle\",\n",
       "            \"nameGap\": 25,\n",
       "            \"nameTextStyle\": {\n",
       "                \"fontSize\": 14\n",
       "            },\n",
       "            \"axisLabel\": {\n",
       "                \"formatter\": \"{value} \",\n",
       "                \"rotate\": 0,\n",
       "                \"interval\": \"auto\",\n",
       "                \"margin\": 8,\n",
       "                \"textStyle\": {\n",
       "                    \"fontSize\": 14,\n",
       "                    \"color\": \"#000\"\n",
       "                }\n",
       "            },\n",
       "            \"axisTick\": {\n",
       "                \"alignWithLabel\": false\n",
       "            },\n",
       "            \"inverse\": false,\n",
       "            \"position\": null,\n",
       "            \"boundaryGap\": true,\n",
       "            \"min\": null,\n",
       "            \"max\": null,\n",
       "            \"splitLine\": {\n",
       "                \"show\": true\n",
       "            },\n",
       "            \"type\": \"value\"\n",
       "        }\n",
       "    ],\n",
       "    \"color\": [\n",
       "        \"#c23531\",\n",
       "        \"#2f4554\",\n",
       "        \"#61a0a8\",\n",
       "        \"#d48265\",\n",
       "        \"#749f83\",\n",
       "        \"#ca8622\",\n",
       "        \"#bda29a\",\n",
       "        \"#6e7074\",\n",
       "        \"#546570\",\n",
       "        \"#c4ccd3\",\n",
       "        \"#f05b72\",\n",
       "        \"#ef5b9c\",\n",
       "        \"#f47920\",\n",
       "        \"#905a3d\",\n",
       "        \"#fab27b\",\n",
       "        \"#2a5caa\",\n",
       "        \"#444693\",\n",
       "        \"#726930\",\n",
       "        \"#b2d235\",\n",
       "        \"#6d8346\",\n",
       "        \"#ac6767\",\n",
       "        \"#1d953f\",\n",
       "        \"#6950a1\",\n",
       "        \"#918597\",\n",
       "        \"#f6f5ec\"\n",
       "    ]\n",
       "};\n",
       "myChart_b78a0f5bc1374b49b9543e2d8d8f1188.setOption(option_b78a0f5bc1374b49b9543e2d8d8f1188);\n",
       "\n",
       "    });\n",
       "</script>\n"
      ],
      "text/plain": [
       "<pyecharts.charts.bar.Bar at 0x125ad1780>"
      ]
     },
     "execution_count": 364,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "bar = Bar(\"路转粉的男女性别比例\", width = 600,height=500)\n",
    "bar.add(\"\", ['女', '男'], fans.values, is_stack=True, \n",
    "       xaxis_label_textsize=20, yaxis_label_textsize=14, is_label_show=True)\n",
    "bar"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 365,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "f    0.669501\n",
       "m    0.330499\n",
       "Name: user.gender, dtype: float64"
      ]
     },
     "execution_count": 365,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "fans/fans.sum()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 5. 评论的词云图"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 369,
   "metadata": {},
   "outputs": [],
   "source": [
    "import jieba\n",
    "from collections import Counter\n",
    "from pyecharts import WordCloud\n",
    "\n",
    "jieba.add_word('吴亦凡')\n",
    "jieba.add_word('蔡徐坤')\n",
    "jieba.add_word('ikun')\n",
    "jieba.add_word('凡凡')\n",
    "jieba.add_word('Kris')\n",
    "\n",
    "swords = [x.strip() for x in open ('stopwords.txt')]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 373,
   "metadata": {},
   "outputs": [],
   "source": [
    "def plot_word_cloud(data, swords):\n",
    "    text = ''.join(data)\n",
    "    words = list(jieba.cut(text))\n",
    "    ex_sw_words = []\n",
    "    for word in words:\n",
    "        if len(word)>1 and (word not in swords):\n",
    "            ex_sw_words.append(word)\n",
    "    c = Counter()\n",
    "    c = Counter(ex_sw_words)\n",
    "    wc_data = pd.DataFrame({'word':list(c.keys()), 'counts':list(c.values())}).sort_values(by='counts', ascending=False).head(100)\n",
    "    wordcloud = WordCloud(width=1300, height=620)\n",
    "    wordcloud.add(\"\", wc_data['word'], wc_data['counts'], word_size_range=[20, 100])\n",
    "    return wordcloud"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 375,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<script>\n",
       "    require.config({\n",
       "        paths: {\n",
       "            'echarts': '/nbextensions/echarts/echarts.min', 'wordcloud': '/nbextensions/echarts/echarts-wordcloud.min'\n",
       "        }\n",
       "    });\n",
       "</script>\n",
       "    <div id=\"7ad4c75a1eac486cb17be9bac78942b5\" style=\"width:1300px;height:620px;\"></div>\n",
       "\n",
       "\n",
       "<script>\n",
       "    require(['echarts', 'wordcloud'], function(echarts) {\n",
       "        \n",
       "var myChart_7ad4c75a1eac486cb17be9bac78942b5 = echarts.init(document.getElementById('7ad4c75a1eac486cb17be9bac78942b5'), null, {renderer: 'canvas'});\n",
       "var option_7ad4c75a1eac486cb17be9bac78942b5 = {\n",
       "    \"title\": [\n",
       "        {\n",
       "            \"text\": \"\",\n",
       "            \"subtext\": \"\",\n",
       "            \"left\": \"auto\",\n",
       "            \"top\": \"auto\",\n",
       "            \"textStyle\": {\n",
       "                \"color\": \"#000\",\n",
       "                \"fontSize\": 18\n",
       "            },\n",
       "            \"subtextStyle\": {\n",
       "                \"color\": \"#aaa\",\n",
       "                \"fontSize\": 12\n",
       "            }\n",
       "        }\n",
       "    ],\n",
       "    \"toolbox\": {\n",
       "        \"show\": true,\n",
       "        \"orient\": \"vertical\",\n",
       "        \"left\": \"95%\",\n",
       "        \"top\": \"center\",\n",
       "        \"feature\": {\n",
       "            \"saveAsImage\": {\n",
       "                \"show\": true,\n",
       "                \"title\": \"\\u4e0b\\u8f7d\\u56fe\\u7247\"\n",
       "            },\n",
       "            \"restore\": {\n",
       "                \"show\": true\n",
       "            },\n",
       "            \"dataView\": {\n",
       "                \"show\": true\n",
       "            }\n",
       "        }\n",
       "    },\n",
       "    \"series_id\": 119835,\n",
       "    \"tooltip\": {\n",
       "        \"trigger\": \"item\",\n",
       "        \"triggerOn\": \"mousemove|click\",\n",
       "        \"axisPointer\": {\n",
       "            \"type\": \"line\"\n",
       "        },\n",
       "        \"formatter\": null,\n",
       "        \"textStyle\": {\n",
       "            \"color\": \"#fff\",\n",
       "            \"fontSize\": 14\n",
       "        },\n",
       "        \"backgroundColor\": \"rgba(50,50,50,0.7)\",\n",
       "        \"borderColor\": \"#333\",\n",
       "        \"borderWidth\": 0\n",
       "    },\n",
       "    \"series\": [\n",
       "        {\n",
       "            \"type\": \"wordCloud\",\n",
       "            \"name\": \"\",\n",
       "            \"shape\": \"circle\",\n",
       "            \"rotationRange\": [\n",
       "                -90,\n",
       "                90\n",
       "            ],\n",
       "            \"rotationStep\": 45,\n",
       "            \"girdSize\": 20,\n",
       "            \"sizeRange\": [\n",
       "                20,\n",
       "                100\n",
       "            ],\n",
       "            \"data\": [\n",
       "                {\n",
       "                    \"name\": \"\\u54c8\\u54c8\\u54c8\",\n",
       "                    \"value\": 8076,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(59,11,47)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u5434\\u4ea6\\u51e1\",\n",
       "                    \"value\": 6565,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(127,112,0)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u53ef\\u7231\",\n",
       "                    \"value\": 4775,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(143,58,94)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u597d\\u542c\",\n",
       "                    \"value\": 4770,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(124,93,29)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u5927\\u7897\",\n",
       "                    \"value\": 4103,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(100,117,40)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u5bbd\\u9762\",\n",
       "                    \"value\": 3753,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(126,92,89)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u5f00\\u5fc3\",\n",
       "                    \"value\": 3439,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(80,77,14)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"doge\",\n",
       "                    \"value\": 3153,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(31,95,16)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u54c8\\u54c8\\u54c8\\u54c8\",\n",
       "                    \"value\": 2899,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(145,39,63)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u51e1\\u51e1\",\n",
       "                    \"value\": 2818,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(43,159,56)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u5141\\u60b2\",\n",
       "                    \"value\": 2561,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(132,116,121)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u554a\\u554a\\u554a\",\n",
       "                    \"value\": 2546,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(93,127,60)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u563b\\u563b\",\n",
       "                    \"value\": 2518,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(12,20,77)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u54e5\\u54e5\",\n",
       "                    \"value\": 2343,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(12,107,45)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u5403\\u9762\",\n",
       "                    \"value\": 2142,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(105,91,51)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u559c\\u6b22\",\n",
       "                    \"value\": 2039,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(131,3,108)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"good\",\n",
       "                    \"value\": 2036,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(67,111,150)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"cry\",\n",
       "                    \"value\": 2000,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(92,19,155)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u4f5c\\u63d6\",\n",
       "                    \"value\": 1894,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(43,26,37)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u6709\\u70b9\",\n",
       "                    \"value\": 1886,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(136,98,78)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u8d85\\u8bdd\",\n",
       "                    \"value\": 1757,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(154,11,22)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u5c0f\\u5fc3\",\n",
       "                    \"value\": 1661,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(73,17,74)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u6709\\u8da3\",\n",
       "                    \"value\": 1626,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(138,32,152)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u9f13\\u638c\",\n",
       "                    \"value\": 1594,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(62,126,87)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u5148\\u751f\",\n",
       "                    \"value\": 1489,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(61,21,27)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"Mr\",\n",
       "                    \"value\": 1237,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(93,98,82)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u61a7\\u61ac\",\n",
       "                    \"value\": 1170,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(55,145,63)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u51e1\\u54e5\",\n",
       "                    \"value\": 1113,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(65,115,113)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u4e00\\u7537\",\n",
       "                    \"value\": 1086,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(31,143,123)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u9e7f\\u6657\",\n",
       "                    \"value\": 1076,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(21,25,127)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u4e8c\\u54c8\",\n",
       "                    \"value\": 1063,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(15,58,73)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u751f\\u65e5\\u5feb\\u4e50\",\n",
       "                    \"value\": 1031,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(59,135,44)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u660e\\u5929\",\n",
       "                    \"value\": 996,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(151,61,26)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u5927\\u53c8\\u5706\",\n",
       "                    \"value\": 988,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(67,96,36)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u52a0\\u6cb9\",\n",
       "                    \"value\": 981,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(131,63,21)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u7897\\u9762\",\n",
       "                    \"value\": 958,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(113,17,58)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u4e0d\\u9519\",\n",
       "                    \"value\": 955,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(42,146,54)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u8001\\u5e08\",\n",
       "                    \"value\": 945,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(85,78,94)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u7c73\\u5947\",\n",
       "                    \"value\": 877,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(119,75,11)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u6bd4\\u5fc3\",\n",
       "                    \"value\": 872,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(92,15,135)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"skr\",\n",
       "                    \"value\": 861,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(96,70,90)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u4eca\\u5929\",\n",
       "                    \"value\": 844,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(67,68,45)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u9996\\u6b4c\",\n",
       "                    \"value\": 835,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(27,120,108)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u7f9e\\u55d2\",\n",
       "                    \"value\": 752,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(157,43,41)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u8f6c\\u7c89\",\n",
       "                    \"value\": 748,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(117,156,94)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u998b\\u5634\",\n",
       "                    \"value\": 696,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(139,82,10)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u6f14\\u5531\\u4f1a\",\n",
       "                    \"value\": 687,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(140,33,27)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u97f3\\u4e50\",\n",
       "                    \"value\": 658,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(90,59,58)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u7b80\\u5355\",\n",
       "                    \"value\": 653,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(113,0,32)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u5bf9\\u4e0d\\u8d77\",\n",
       "                    \"value\": 632,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(23,105,133)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u5feb\\u4e50\",\n",
       "                    \"value\": 607,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(31,140,103)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"respect\",\n",
       "                    \"value\": 601,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(102,35,150)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u4e2d\\u56fd\",\n",
       "                    \"value\": 598,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(41,117,70)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u8fd9\\u6ce2\",\n",
       "                    \"value\": 580,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(131,5,89)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"ok\",\n",
       "                    \"value\": 574,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(26,27,100)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u5389\\u5bb3\",\n",
       "                    \"value\": 571,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(84,20,48)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u5154\\u5b50\",\n",
       "                    \"value\": 569,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(158,107,149)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"haha\",\n",
       "                    \"value\": 568,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(34,5,160)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u6709\\u610f\\u601d\",\n",
       "                    \"value\": 567,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(78,158,101)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u683c\\u5c40\",\n",
       "                    \"value\": 540,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(11,28,16)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u652f\\u6301\",\n",
       "                    \"value\": 537,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(132,95,139)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u5fae\\u7b11\",\n",
       "                    \"value\": 535,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(123,24,73)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u4e0d\\u8bed\",\n",
       "                    \"value\": 524,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(98,120,63)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u4e00\\u8d77\",\n",
       "                    \"value\": 521,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(66,87,86)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u9762\\u5b83\",\n",
       "                    \"value\": 504,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(95,47,14)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u52a8\\u753b\",\n",
       "                    \"value\": 495,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(27,69,116)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u725b\\u6843\",\n",
       "                    \"value\": 494,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(77,141,17)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u745e\\u601d\\u62dc\",\n",
       "                    \"value\": 479,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(14,103,69)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u59d0\\u59d0\",\n",
       "                    \"value\": 469,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(98,59,155)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u5f8b\\u5e08\\u51fd\",\n",
       "                    \"value\": 466,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(27,78,157)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u4eb2\\u4eb2\",\n",
       "                    \"value\": 466,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(81,140,87)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"##\",\n",
       "                    \"value\": 458,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(37,152,73)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u8c22\\u8c22\",\n",
       "                    \"value\": 454,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(126,120,140)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u5077\\u7b11\",\n",
       "                    \"value\": 454,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(68,8,38)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u771f\\u662f\",\n",
       "                    \"value\": 445,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(112,41,58)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u786e\\u5b9e\",\n",
       "                    \"value\": 442,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(106,140,140)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u77e5\\u9053\",\n",
       "                    \"value\": 434,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(100,57,52)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u597d\\u597d\",\n",
       "                    \"value\": 432,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(114,18,87)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u610f\\u601d\",\n",
       "                    \"value\": 430,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(4,121,79)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u7537\\u4eba\",\n",
       "                    \"value\": 428,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(147,81,117)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"mv\",\n",
       "                    \"value\": 424,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(156,152,18)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u73b0\\u5728\",\n",
       "                    \"value\": 419,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(97,3,148)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u545c\\u545c\",\n",
       "                    \"value\": 417,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(54,86,54)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u4f18\\u79c0\",\n",
       "                    \"value\": 416,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(26,130,38)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"2019\",\n",
       "                    \"value\": 404,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(129,149,130)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u671f\\u5f85\",\n",
       "                    \"value\": 390,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(128,153,73)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u8521\\u5f90\\u5764\",\n",
       "                    \"value\": 384,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(141,30,110)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u64cd\\u4f5c\",\n",
       "                    \"value\": 377,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(109,43,159)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u6001\\u5ea6\",\n",
       "                    \"value\": 372,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(117,150,160)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u4f69\\u670d\",\n",
       "                    \"value\": 371,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(116,72,84)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u60c5\\u5546\",\n",
       "                    \"value\": 371,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(40,123,14)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u5fae\\u535a\",\n",
       "                    \"value\": 366,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(156,127,125)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u9c9c\\u82b1\",\n",
       "                    \"value\": 364,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(141,154,107)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"Repost\",\n",
       "                    \"value\": 364,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(109,58,123)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u4e00\\u7897\",\n",
       "                    \"value\": 364,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(126,146,33)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u4e00\\u4e0b\",\n",
       "                    \"value\": 362,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(68,140,124)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u65b0\\u6b4c\",\n",
       "                    \"value\": 361,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(153,60,21)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u5de1\\u56de\\u6f14\\u5531\",\n",
       "                    \"value\": 360,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(11,117,79)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u5faa\\u73af\",\n",
       "                    \"value\": 348,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(27,105,23)\"\n",
       "                        }\n",
       "                    }\n",
       "                },\n",
       "                {\n",
       "                    \"name\": \"\\u7a81\\u7136\",\n",
       "                    \"value\": 348,\n",
       "                    \"textStyle\": {\n",
       "                        \"normal\": {\n",
       "                            \"color\": \"rgb(149,98,36)\"\n",
       "                        }\n",
       "                    }\n",
       "                }\n",
       "            ]\n",
       "        }\n",
       "    ],\n",
       "    \"legend\": [\n",
       "        {\n",
       "            \"data\": [],\n",
       "            \"selectedMode\": \"multiple\",\n",
       "            \"show\": true,\n",
       "            \"left\": \"center\",\n",
       "            \"top\": \"top\",\n",
       "            \"orient\": \"horizontal\",\n",
       "            \"textStyle\": {\n",
       "                \"fontSize\": 12,\n",
       "                \"color\": \"#333\"\n",
       "            }\n",
       "        }\n",
       "    ],\n",
       "    \"backgroundColor\": \"#fff\",\n",
       "    \"color\": [\n",
       "        \"#c23531\",\n",
       "        \"#2f4554\",\n",
       "        \"#61a0a8\",\n",
       "        \"#d48265\",\n",
       "        \"#749f83\",\n",
       "        \"#ca8622\",\n",
       "        \"#bda29a\",\n",
       "        \"#6e7074\",\n",
       "        \"#546570\",\n",
       "        \"#c4ccd3\",\n",
       "        \"#f05b72\",\n",
       "        \"#ef5b9c\",\n",
       "        \"#f47920\",\n",
       "        \"#905a3d\",\n",
       "        \"#fab27b\",\n",
       "        \"#2a5caa\",\n",
       "        \"#444693\",\n",
       "        \"#726930\",\n",
       "        \"#b2d235\",\n",
       "        \"#6d8346\",\n",
       "        \"#ac6767\",\n",
       "        \"#1d953f\",\n",
       "        \"#6950a1\",\n",
       "        \"#918597\",\n",
       "        \"#f6f5ec\"\n",
       "    ]\n",
       "};\n",
       "myChart_7ad4c75a1eac486cb17be9bac78942b5.setOption(option_7ad4c75a1eac486cb17be9bac78942b5);\n",
       "\n",
       "    });\n",
       "</script>\n"
      ],
      "text/plain": [
       "<pyecharts.charts.wordcloud.WordCloud at 0x125a10668>"
      ]
     },
     "execution_count": 375,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "plot_word_cloud(data=data_true.loc[-data_true['clean_text'].str.contains(\"转发\"), 'clean_text'], swords=swords)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
